Пример #1
0
    def _custom_functions(self):
        if len(Global._objects['functions']) == 0:
            return "", ""
        from ANNarchy.parser.Extraction import extract_functions

        export = ""
        wrapper = ""
        for name, func in Global._objects['functions']:
            desc = extract_functions(func, local_global=True)[0]
            # Export
            export += ' '*4 + desc['return_type'] + " " + desc['name'] + '('
            for idx, arg in enumerate(desc['arg_types']):
                export += arg
                if idx < len(desc['arg_types']) - 1:
                    export += ', '
            export += ')' + '\n'

            # Wrapper
            arguments=""
            wrapper += "cpdef np.ndarray func_" + desc['name'] + '('
            for idx, arg in enumerate(desc['args']):
                # Function call
                wrapper += arg
                if idx < len(desc['args']) - 1:
                    wrapper += ', '
                # Element access
                arguments += arg + "[i]"
                if idx < len(desc['args']) - 1:
                    arguments += ', '
            wrapper += '):'
            wrapper += """
    return np.array([%(funcname)s(%(args)s) for i in range(len(%(first_arg)s))])
""" % {'funcname': desc['name'], 'first_arg' : desc['args'][0], 'args': arguments}

        return export, wrapper
Пример #2
0
    def header_custom_functions(self):

        if len(Global._objects['functions']) == 0:
            return ""

        code = ""
        from ANNarchy.parser.Extraction import extract_functions
        for func in Global._objects['functions']:
            code += extract_functions(func, local_global=True)[0]['cpp'] + '\n'

        return code
Пример #3
0
    def _header_custom_functions(self):
        """
        Generate code for custom functions defined globally and are usable
        witihn neuron or synapse descriptions. These functions can only rely on
        provided arguments.
        """
        if len(Global._objects['functions']) == 0:
            return ""

        # Attention CUDA: this definition will work only on host side.
        code = ""
        for _, func in Global._objects['functions']:
            code += extract_functions(func, local_global=True)[0]['cpp'] + '\n'

        return code
Пример #4
0
    def _header_custom_functions(self):
        """
        Generate code for custom functions defined globally and are usable
        witihn neuron or synapse descriptions. These functions can only rely on
        provided arguments.
        """
        if len(Global._objects['functions']) == 0:
            return ""

        # Attention CUDA: this definition will work only on host side.
        code = ""
        for _, func in Global._objects['functions']:
            code += extract_functions(func, local_global=True)[0]['cpp'] + '\n'

        return code
Пример #5
0
    def _custom_functions(self):
        if len(Global._objects['functions']) == 0:
            return "", ""
        from ANNarchy.parser.Extraction import extract_functions

        export = ""
        wrapper = ""
        for name, func in Global._objects['functions']:
            desc = extract_functions(func, local_global=True)[0]
            # Export
            export += ' ' * 4 + desc['return_type'] + " " + desc['name'] + '('
            for idx, arg in enumerate(desc['arg_types']):
                export += arg
                if idx < len(desc['arg_types']) - 1:
                    export += ', '
            export += ')' + '\n'

            # Wrapper
            arguments = ""
            wrapper += "cpdef np.ndarray func_" + desc['name'] + '('
            for idx, arg in enumerate(desc['args']):
                # Function call
                wrapper += arg
                if idx < len(desc['args']) - 1:
                    wrapper += ', '
                # Element access
                arguments += arg + "[i]"
                if idx < len(desc['args']) - 1:
                    arguments += ', '
            wrapper += '):'
            wrapper += """
    return np.array([%(funcname)s(%(args)s) for i in range(len(%(first_arg)s))])
""" % {
                'funcname': desc['name'],
                'first_arg': desc['args'][0],
                'args': arguments
            }

        return export, wrapper
Пример #6
0
    def _generate_body(self):
        """
        Generate the codes 'main' library file. The generated code
        will be used in different files, dependent on the chosen
        target platform:

        * openmp: ANNarchy.cpp
        * cuda: ANNarchyHost.cu and ANNarchyDevice.cu
        """
        # struct declaration for each population
        pop_ptr = ""
        for pop in self._pop_desc:
            pop_ptr += pop['instance']

        # struct declaration for each projection
        proj_ptr = ""
        for proj in self._proj_desc:
            proj_ptr += proj['instance']

        # Code for the global operations
        glop_definition = self._body_def_glops()
        update_globalops = ""
        for pop in self._pop_desc:
            if 'gops_update' in pop.keys():
                update_globalops += pop['gops_update']

        # Reset presynaptic sums
        reset_sums = self._body_resetcomputesum_pop()

        # Compute presynaptic sums
        compute_sums = ""
        # Sum over all synapses
        if Global._check_paradigm("openmp"):
            for proj in self._proj_desc:
                compute_sums += proj["compute_psp"]

        # Init rng dist
        init_rng_dist = ""
        for pop in self._populations:
            init_rng_dist += """pop%(id)s.init_rng_dist();\n""" % {
                'id': pop.id
            }

        # Update random distributions
        rd_update_code = ""
        for desc in self._pop_desc + self._proj_desc:
            if 'rng_update' in desc.keys():
                rd_update_code += desc['rng_update']

        # Equations for the neural variables
        update_neuron = ""
        for pop in self._pop_desc:
            if 'update' in pop.keys():
                update_neuron += pop['update']

        # Enque delayed outputs
        delay_code = ""
        for pop in self._pop_desc:
            if 'delay_update' in pop.keys():
                delay_code += pop['delay_update']

        # Equations for the synaptic variables
        update_synapse = ""
        for proj in self._proj_desc:
            if 'update' in proj.keys():
                update_synapse += proj['update']

        # Equations for the post-events
        post_event = ""
        for proj in self._proj_desc:
            if 'post_event' in proj.keys():
                post_event += proj['post_event']

        # Structural plasticity
        structural_plasticity = self._body_structural_plasticity()

        # Early stopping
        run_until = self._body_run_until()

        #Profiling
        if self._profgen:
            prof_dict = self._profgen.generate_body_dict()
        else:
            prof_dict = Profile.ProfileGenerator(
                self._annarchy_dir, self._net_id).generate_body_dict()

        #
        # Generate the ANNarchy.cpp code, the corrsponding template differs
        # greatly. For further information take a look into the corresponding
        # branches.
        #
        if Global.config['paradigm'] == "openmp":
            # custom constants
            custom_constant, _ = self._body_custom_constants()

            # code fields for openMP/single thread template
            base_dict = {
                'float_prec': Global.config['precision'],
                'pop_ptr': pop_ptr,
                'proj_ptr': proj_ptr,
                'glops_def': glop_definition,
                'initialize': self._body_initialize(),
                'init_rng_dist': init_rng_dist,
                'run_until': run_until,
                'compute_sums': compute_sums,
                'reset_sums': reset_sums,
                'update_neuron': update_neuron,
                'update_globalops': update_globalops,
                'update_synapse': update_synapse,
                'random_dist_update': rd_update_code,
                'delay_code': delay_code,
                'post_event': post_event,
                'structural_plasticity': structural_plasticity,
                'custom_constant': custom_constant,
            }

            # profiling
            base_dict.update(prof_dict)

            # complete code template
            if Global.config["num_threads"] == 1:
                return BaseTemplate.st_body_template % base_dict
            else:
                return BaseTemplate.omp_body_template % base_dict

        elif Global.config['paradigm'] == "cuda":
            # Implementation notice ( HD: 10. June, 2015 )
            #
            # The CUDA linking process is a big problem for object oriented approaches
            # and the seperation of implementation codes into several files. Even in the
            # current SDK 5.0 this problem is not fully solved. Linking is available, but
            # only for small, independent code pieces, by far not sufficient for full
            # object-oriented approaches ...
            #
            # For us, this currently have one consequence: we cannot completely seperate
            # the implementation of objects into several files. To hold a certain equality
            # between the structures of objects, I implemented the following workaround:
            #
            # We create the c-structs holding data fields and accessors as in OpenMP. We also
            # create the kernels, call entity in the corresponding generator objects, and
            # return the codes via the descriptor dictionary.
            #
            # This ensures a consistent interface in the generators and also in the generated
            # codes, but sometimes require additional overhead. Hopefully NVidia will improve
            # their linker in the next releases, so one could remove this overhead.
            psp_call = ""
            for proj in self._proj_desc:
                psp_call += proj['psp_call']

            # custom constants
            host_custom_constant, _, device_custom_constant = self._body_custom_constants(
            )

            # custom functions
            custom_func = ""
            for pop in self._pop_desc:
                custom_func += pop['custom_func']
            for proj in self._proj_desc:
                custom_func += proj['custom_func']
            for _, func in Global._objects['functions']:
                custom_func += extract_functions(
                    func, local_global=True)[0]['cpp'].replace(
                        "inline", "__device__") + '\n'

            # pre-defined/common available kernel
            common_kernel = self._cuda_common_kernel(self._projections)

            pop_kernel = ""
            for pop in self._pop_desc:
                pop_kernel += pop['update_body']

            pop_update_fr = ""
            for pop in self._pop_desc:
                pop_update_fr += pop['update_FR']

            psp_kernel = ""
            for proj in self._proj_desc:
                psp_kernel += proj['psp_body']

            kernel_def = ""
            for pop in self._pop_desc:
                kernel_def += pop['update_header']

            for proj in self._proj_desc:
                kernel_def += proj['psp_header']
                kernel_def += proj['update_synapse_header']
                kernel_def += proj['postevent_header']

            delay_code = ""
            for pop in self._pop_desc:
                if 'update_delay' in pop.keys():
                    delay_code += pop['update_delay']

            syn_kernel = ""
            for proj in self._proj_desc:
                syn_kernel += proj['update_synapse_body']

            syn_call = ""
            for proj in self._proj_desc:
                syn_call += proj['update_synapse_call']

            postevent_kernel = ""
            for proj in self._proj_desc:
                postevent_kernel += proj['postevent_body']

            postevent_call = ""
            for proj in self._proj_desc:
                postevent_call += proj['postevent_call']

            clear_sums = self._body_resetcomputesum_pop()

            # global operations
            glob_ops_header, glob_ops_body = self._body_def_glops()
            kernel_def += glob_ops_header

            # determine number of threads per kernel
            threads_per_kernel = self._cuda_kernel_config()

            #  concurrent kernel execution
            stream_setup = self._cuda_stream_config()

            # memory transfers
            host_device_transfer, device_host_transfer = "", ""
            for pop in self._pop_desc + self._proj_desc:
                host_device_transfer += pop['host_to_device']
                device_host_transfer += pop['device_to_host']

            #Profiling
            if self._profgen:
                prof_dict = self._profgen.generate_body_dict()
            else:
                prof_dict = Profile.ProfileGenerator(
                    self._annarchy_dir, self._net_id).generate_body_dict()

            #
            # HD ( 31.07.2016 ):
            #
            # I'm not really sure, what exactly causes the problem with this
            # atomicAdd function. If we move it into ANNarchyDevice.cu, the
            # macro seems to be evaluated wrongly and the atomicAdd() function
            # appears doubled or appears not.
            #
            # So as "solution", the atomicAdd definition block resides in
            # ANNarchyHost and only the computation kernels are placed in
            # ANNarchyDevice. If we decide to use SDK8 as lowest requirement,
            # one can move this kernel too.
            device_code = BaseTemplate.cuda_device_kernel_template % {
                #device stuff
                'common_kernel': common_kernel,
                'pop_kernel': pop_kernel,
                'psp_kernel': psp_kernel,
                'syn_kernel': syn_kernel,
                'glob_ops_kernel': glob_ops_body,
                'postevent_kernel': postevent_kernel,
                'custom_func': custom_func,
                'custom_constant': device_custom_constant,
                'built_in': BaseTemplate.built_in_functions +
                BaseTemplate.integer_power_cuda % {
                    'float_prec': Global.config['precision']
                },
                'float_prec': Global.config['precision']
            }

            base_dict = {
                # network definitions
                'float_prec': Global.config['precision'],
                'pop_ptr': pop_ptr,
                'proj_ptr': proj_ptr,
                'run_until': run_until,
                'clear_sums': clear_sums,
                'compute_sums': psp_call,
                'update_neuron': update_neuron,
                'update_FR': pop_update_fr,
                'update_globalops': update_globalops,
                'update_synapse': syn_call,
                'post_event': postevent_call,
                'delay_code': delay_code,
                'initialize': self._body_initialize(),
                'structural_plasticity': structural_plasticity,

                # cuda host specific
                'stream_setup': stream_setup,
                'host_device_transfer': host_device_transfer,
                'device_host_transfer': device_host_transfer,
                'kernel_def': kernel_def,
                'kernel_config': threads_per_kernel,
                'custom_constant': host_custom_constant
            }
            base_dict.update(prof_dict)

            host_code = BaseTemplate.cuda_host_body_template % base_dict
            return device_code, host_code
        else:
            raise NotImplementedError
Пример #7
0
    def _generate_body(self):
        """
        Generate the codes 'main' library file. The generated code
        will be used in different files, dependent on the chosen
        target platform:

        * openmp: ANNarchy.cpp
        * cuda: ANNarchyHost.cu and ANNarchyDevice.cu
        """
        # struct declaration for each population
        pop_ptr = ""
        for pop in self._pop_desc:
            pop_ptr += pop['instance']

        # struct declaration for each projection
        proj_ptr = ""
        for proj in self._proj_desc:
            proj_ptr += proj['instance']

        # Code for the global operations
        glop_definition = self._body_def_glops()
        update_globalops = ""
        for pop in self._pop_desc:
            if 'gops_update' in pop.keys():
                update_globalops += pop['gops_update']

        # Reset presynaptic sums
        reset_sums = self._body_resetcomputesum_pop()

        # Compute presynaptic sums
        compute_sums = self._body_computesum_proj()

        # Update random distributions
        rd_update_code = ""
        for desc in self._pop_desc + self._proj_desc:
            if 'rng_update' in desc.keys():
                rd_update_code += desc['rng_update']

        # Equations for the neural variables
        update_neuron = ""
        for pop in self._pop_desc:
            if 'update' in pop.keys():
                update_neuron += pop['update']

        # Enque delayed outputs
        delay_code = ""
        for pop in self._pop_desc:
            if 'delay_update' in pop.keys():
                delay_code += pop['delay_update']

        # Equations for the synaptic variables
        update_synapse = ""
        for proj in self._proj_desc:
            if 'update' in proj.keys():
                update_synapse += proj['update']

        # Equations for the post-events
        post_event = ""
        for proj in self._proj_desc:
            if 'post_event' in proj.keys():
                post_event += proj['post_event']

        # Structural plasticity
        structural_plasticity = self._body_structural_plasticity()

        # Early stopping
        run_until = self._body_run_until()

        # Number threads
        number_threads = "omp_set_num_threads(threads);" if Global.config['num_threads'] > 1 else ""

        #Profiling
        if self._profgen:
            prof_dict = self._profgen.generate_body_dict()
        else:
            from .Profile import ProfileGenerator
            prof_dict = ProfileGenerator(self._annarchy_dir, self._net_id).generate_body_dict()

        #
        # Generate the ANNarchy.cpp code, the corrsponding template differs
        # greatly. For further information take a look into the corresponding
        # branches.
        #
        if Global.config['paradigm'] == "openmp":
            # custom constants
            custom_constant, _ = self._body_custom_constants()

            from .Template.BaseTemplate import omp_body_template
            base_dict = {
                'float_prec': Global.config['precision'],
                'pop_ptr': pop_ptr,
                'proj_ptr': proj_ptr,
                'glops_def': glop_definition,
                'initialize': self._body_initialize(),
                'run_until': run_until,
                'compute_sums' : compute_sums,
                'reset_sums' : reset_sums,
                'update_neuron' : update_neuron,
                'update_globalops' : update_globalops,
                'update_synapse' : update_synapse,
                'random_dist_update' : rd_update_code,
                'delay_code' : delay_code,
                'post_event' : post_event,
                'structural_plasticity': structural_plasticity,
                'set_number_threads' : number_threads,
                'custom_constant': custom_constant,
            }

            base_dict.update(prof_dict)
            return omp_body_template % base_dict

        elif Global.config['paradigm'] == "cuda":
            # Implementation notice ( HD: 10. June, 2015 )
            #
            # The CUDA linking process is a big problem for object oriented approaches
            # and the seperation of implementation codes into several files. Even in the
            # current SDK 5.0 this problem is not fully solved. Linking is available, but
            # only for small, independent code pieces, by far not sufficient for full
            # object-oriented approaches ...
            #
            # For us, this currently have one consequence: we cannot completely seperate
            # the implementation of objects into several files. To hold a certain equality
            # between the structures of objects, I implemented the following workaround:
            #
            # We create the c-structs holding data fields and accessors as in OpenMP. We also
            # create the kernels, call entity in the corresponding generator objects, and
            # return the codes via the descriptor dictionary.
            #
            # This ensures a consistent interface in the generators and also in the generated
            # codes, but sometimes require additional overhead. Hopefully NVidia will improve
            # their linker in the next releases, so one could remove this overhead.
            psp_call = ""
            for proj in self._proj_desc:
                psp_call += proj['psp_call']

            # custom constants
            host_custom_constant, _, device_custom_constant = self._body_custom_constants()

            # custom functions
            custom_func = ""
            for pop in self._pop_desc:
                custom_func += pop['custom_func']
            for proj in self._proj_desc:
                custom_func += proj['custom_func']
            for _, func in Global._objects['functions']:
                custom_func += extract_functions(func, local_global=True)[0]['cpp'].replace("inline", "__device__") + '\n'

            pop_kernel = ""
            for pop in self._pop_desc:
                pop_kernel += pop['update_body']

            pop_update_fr = ""
            for pop in self._pop_desc:
                pop_update_fr += pop['update_FR']

            psp_kernel = ""
            for proj in self._proj_desc:
                psp_kernel += proj['psp_body']

            kernel_def = ""
            for pop in self._pop_desc:
                kernel_def += pop['update_header']

            for proj in self._proj_desc:
                kernel_def += proj['psp_header']
                kernel_def += proj['update_synapse_header']
                kernel_def += proj['postevent_header']

            delay_code = ""
            for pop in self._pop_desc:
                if 'update_delay' in pop.keys():
                    delay_code += pop['update_delay']

            syn_kernel = ""
            for proj in self._proj_desc:
                syn_kernel += proj['update_synapse_body']

            syn_call = ""
            for proj in self._proj_desc:
                syn_call += proj['update_synapse_call']

            postevent_kernel = ""
            for proj in self._proj_desc:
                postevent_kernel += proj['postevent_body']

            postevent_call = ""
            for proj in self._proj_desc:
                postevent_call += proj['postevent_call']

            clear_sums = self._body_resetcomputesum_pop()

            # global operations
            glob_ops_header, glob_ops_body = self._body_def_glops()
            kernel_def += glob_ops_header

            # determine number of threads per kernel
            threads_per_kernel = self._cuda_kernel_config()

            #  concurrent kernel execution
            stream_setup = self._cuda_stream_config()

            # memory transfers
            host_device_transfer, device_host_transfer = "", ""
            for pop in self._pop_desc + self._proj_desc:
                host_device_transfer += pop['host_to_device']
                device_host_transfer += pop['device_to_host']

            #Profiling
            if self._profgen:
                prof_dict = self._profgen.generate_body_dict()
            else:
                from .Profile import ProfileGenerator
                prof_dict = ProfileGenerator(self._annarchy_dir, self._net_id).generate_body_dict()

            #
            # HD ( 31.07.2016 ):
            #
            # I'm not really sure, what exactly causes the problem with this
            # atomicAdd function. If we move it into ANNarchyDevice.cu, the
            # macro seems to be evaluated wrongly and the atomicAdd() function
            # appears doubled or appears not.
            #
            # So as "solution", the atomicAdd definition block resides in
            # ANNarchyHost and only the computation kernels are placed in
            # ANNarchyDevice. If we decide to use SDK8 as lowest requirement,
            # one can move this kernel too.
            from .Template.BaseTemplate import cuda_device_kernel_template, cuda_host_body_template, built_in_functions
            device_code = cuda_device_kernel_template % {
                #device stuff
                'pop_kernel': pop_kernel,
                'psp_kernel': psp_kernel,
                'syn_kernel': syn_kernel,
                'glob_ops_kernel': glob_ops_body,
                'postevent_kernel': postevent_kernel,
                'custom_func': custom_func,
                'custom_constant': device_custom_constant,
                'built_in': built_in_functions,
                'float_prec': Global.config['precision']
            }

            base_dict = {
                # network definitions
                'float_prec': Global.config['precision'],
                'pop_ptr': pop_ptr,
                'proj_ptr': proj_ptr,
                'run_until': run_until,
                'clear_sums': clear_sums,
                'compute_sums' : psp_call,
                'update_neuron' : update_neuron,
                'update_FR': pop_update_fr,
                'update_globalops' : update_globalops,
                'update_synapse' : syn_call,
                'post_event': postevent_call,
                'delay_code': delay_code,
                'initialize' : self._body_initialize(),
                'structural_plasticity': structural_plasticity,

                # cuda host specific
                'stream_setup': stream_setup,
                'host_device_transfer': host_device_transfer,
                'device_host_transfer': device_host_transfer,
                'kernel_def': kernel_def,
                'kernel_config': threads_per_kernel,
                'custom_constant': host_custom_constant
            }
            base_dict.update(prof_dict)
            host_code = cuda_host_body_template % base_dict
            return device_code, host_code
        else:
            raise NotImplementedError