示例#1
0
    def create(self, input_storage=None, trustme=False):
        ret = super(Profile_Maker, self).create(input_storage, trustme)

        if (hasattr(theano, 'sandbox') and hasattr(theano.sandbox, 'cuda')
                and theano.sandbox.cuda.cuda_enabled):
            if os.environ.get('CUDA_LAUNCH_BLOCKING', '0') != '1':
                raise Exception(
                    "You are running the Theano profiler with CUDA enabled."
                    " Theano GPU ops execution is asynchronous by default."
                    " So by default, the profile is useless."
                    " You must set the environment variable"
                    " CUDA_LAUNCH_BLOCKING to 1 to tell the CUDA driver to"
                    " synchronize the execution to get a meaningful profile.")

        # create a function-specific storage container for profiling info
        profile = ProfileStats(atexit_print=False)
        self.mode.profile_stats[ret] = profile
        ret.profile = profile

        # initialize the timers
        for i, node in enumerate(ret.maker.fgraph.toposort()):
            profile.apply_time[node] = 0.0

            # a thunk_group is a list of the thunks from each linker
            # corresponding to the i'th position in the toposort.
            assert len(ret.fn.thunk_groups[i]) == 1
            profile.apply_cimpl[node] = hasattr(ret.fn.thunk_groups[i][0],
                                                'cthunk')

        # Here we replace the linker function.
        # This ugliness makes WrapLinker (an object that *generates*
        # functions and is not function-specific)  work with ProfileStats
        # objects which are function-specific.

        # capture old fn in closure. This is important since new_fn is about to
        # take its place as ret.fn.
        ret_fn = ret.fn

        def new_fn():
            self.mode.apply_time = self.mode.profile_stats[ret].apply_time
            self.mode.variable_shape = \
                self.mode.profile_stats[ret].variable_shape
            ret_fn()
            # delete the old apply_time variable
            # because it doesn't mean the same thing anymore.
            # This prevents old code from looking like it still works.
            del self.mode.apply_time
            del self.mode.variable_shape

        ret.fn = new_fn

        global run_cthunk
        if run_cthunk is None and any(profile.apply_cimpl.values()):
            # Lazy import to avoid compilation when importing theano.
            from theano.gof.cutils import run_cthunk  # noqa

        warnings.warn("DEPRECATION WARNING: The ProfileMode is deprecated. "
                      "Use the Theano flags/parameter to theano.function "
                      "'profile=True' instead of 'mode=ProfileMode'")
        return ret
示例#2
0
    def create(self, input_storage=None, trustme=False):
        ret = super(Profile_Maker, self).create(input_storage, trustme)

        if (hasattr(theano, 'sandbox') and
            hasattr(theano.sandbox, 'cuda') and
            theano.sandbox.cuda.cuda_enabled):
            if os.environ.get('CUDA_LAUNCH_BLOCKING', '0') != '1':
                raise Exception(
                    "You are running the Theano profiler with CUDA enabled."
                    " Theano GPU ops execution is asynchronous by default."
                    " So by default, the profile is useless."
                    " You must set the environment variable"
                    " CUDA_LAUNCH_BLOCKING to 1 to tell the CUDA driver to"
                    " synchronize the execution to get a meaningful profile.")

        # create a function-specific storage container for profiling info
        profile = ProfileStats(atexit_print=False)
        self.mode.profile_stats[ret] = profile
        ret.profile = profile

        #initialize the timers
        for i, node in enumerate(ret.maker.fgraph.toposort()):
            profile.apply_time[node] = 0.0

            # a thunk_group is a list of the thunks from each linker
            # corresponding to the i'th position in the toposort.
            assert len(ret.fn.thunk_groups[i]) == 1
            profile.apply_cimpl[node] = hasattr(
                    ret.fn.thunk_groups[i][0],
                    'cthunk')

        # Here we replace the linker function.
        # This ugliness makes WrapLinker (an object that *generates*
        # functions and is not function-specific)  work with ProfileStats
        # objects which are function-specific.

        #capture old fn in closure. This is important since new_fn is about to
        #take its place as ret.fn.
        ret_fn = ret.fn

        def new_fn():
            self.mode.apply_time = self.mode.profile_stats[ret].apply_time
            self.mode.variable_shape = self.mode.profile_stats[ret].variable_shape
            ret_fn()
            # delete the old apply_time variable
            # because it doesn't mean the same thing anymore.
            # This prevents old code from looking like it still works.
            del self.mode.apply_time
            del self.mode.variable_shape

        ret.fn = new_fn

        global run_cthunk
        if run_cthunk is None and any(profile.apply_cimpl.values()):
            # Lazy import to avoid compilation when importing theano.
            from theano.gof.cutils import run_cthunk

        return ret
示例#3
0
    def create(self, input_storage=None, trustme=False):
        ret = super(Profile_Maker, self).create(input_storage, trustme)

        # create a function-specific storage container for profiling info
        profile = ProfileStats(atexit_print=False)
        self.mode.profile_stats[ret] = profile
        ret.profile = profile

        #initialize the timers
        for i, node in enumerate(ret.maker.fgraph.toposort()):
            profile.apply_time[node] = 0.0
            profile.outputs_size[node] = [0.0] * len(node.outputs)

            # a thunk_group is a list of the thunks from each linker
            # corresponding to the i'th position in the toposort.
            assert len(ret.fn.thunk_groups[i]) == 1
            profile.apply_cimpl[node] = hasattr(
                    ret.fn.thunk_groups[i][0],
                    'cthunk')

        # Here we replace the linker function.
        # This ugliness makes WrapLinker (an object that *generates*
        # functions and is not function-specific)  work with ProfileStats
        # objects which are function-specific.

        #capture old fn in closure. This is important since new_fn is about to
        #take its place as ret.fn.
        ret_fn = ret.fn

        def new_fn():
            self.mode.apply_time = self.mode.profile_stats[ret].apply_time
            self.mode.outputs_size = self.mode.profile_stats[ret].outputs_size
            ret_fn()
            # delete the old apply_time variable
            # because it doesn't mean the same thing anymore.
            # This prevents old code from looking like it still works.
            del self.mode.apply_time
            del self.mode.outputs_size

        ret.fn = new_fn

        global run_cthunk
        if run_cthunk is None and any(profile.apply_cimpl.values()):
            # Lazy import to avoid compilation when importing theano.
            from theano.gof.cutils import run_cthunk

        return ret
示例#4
0
    def create(self, input_storage=None, trustme=False):
        ret = super(Profile_Maker, self).create(input_storage, trustme)

        # create a function-specific storage container for profiling info
        profile = ProfileStats(atexit_print=False)
        self.mode.profile_stats[ret] = profile
        ret.profile = profile

        #initialize the timers
        for i, node in enumerate(ret.maker.env.toposort()):
            profile.apply_time[node] = 0.0
            profile.outputs_size[node] = [0.0] * len(node.outputs)

            # a thunk_group is a list of the thunks from each linker
            # corresponding to the i'th position in the toposort.
            assert len(ret.fn.thunk_groups[i]) == 1
            profile.apply_cimpl[node] = hasattr(
                    ret.fn.thunk_groups[i][0],
                    'cthunk')

        # Here we replace the linker function.
        # This ugliness makes WrapLinker (an object that *generates*
        # functions and is not function-specific)  work with ProfileStats
        # objects which are function-specific.

        #capture old fn in closure. This is important since new_fn is about to
        #take its place as ret.fn.
        ret_fn = ret.fn

        def new_fn():
            self.mode.apply_time = self.mode.profile_stats[ret].apply_time
            self.mode.outputs_size = self.mode.profile_stats[ret].outputs_size
            ret_fn()
            # delete the old apply_time variable
            # because it doesn't mean the same thing anymore.
            # This prevents old code from looking like it still works.
            del self.mode.apply_time
            del self.mode.outputs_size

        ret.fn = new_fn

        global run_cthunk
        if run_cthunk is None and any(profile.apply_cimpl.values()):
            # Lazy import to avoid compilation when importing theano.
            from theano.gof.cutils import run_cthunk

        return ret
示例#5
0
def pfunc(params, outputs=None, mode=None, updates=None, givens=None,
        no_default_updates=False, accept_inplace=False, name=None,
        rebuild_strict=True, allow_input_downcast=None,
        profile=None, on_unused_input=None):
    """Function-constructor for graphs with shared variables.

    :type params: list of either Variable or Param instances.
    :param params: function parameters, these are not allowed to be shared
    variables

    :type outputs: list of Variables or Out instances
    :param outputs: expressions to compute

    :type mode: string or `theano.compile.Mode` instance.
    :param mode: compilation mode

    :type updates: iterable over pairs (shared_variable, new_expression). List, tuple or dict.
    :param updates: update the values for SharedVariable inputs according to these expressions

    :type givens: iterable over pairs (Var1, Var2) of Variables. List, tuple or dict.  The Var1
    and Var2 in each pair must have the same Type.

    :param givens: specific substitutions to make in the computation graph (Var2 replaces
    Var1).

    :type no_default_updates: either bool or list of Variables
    :param no_default_updates: if True, do not perform any automatic update on Variables.
    If False (default), perform them all. Else, perform automatic updates on all Variables
    that are neither in "updates" nor in "no_default_updates".

    :type name: None or string
    :param name: attaches a name to the Profiling result of this function when
    using ProfileMode (will be deprecated).

    :type allow_input_downcast: Boolean
    :param allow_input_downcast: True means that the values passed as
    inputs when calling the function can be silently downcasted to fit
    the dtype of the corresponding Variable, which may lose precision.
    False means that it will only be cast to a more general, or
    precise, type. None (default) is almost like False, but allows
    downcasting of Python float scalars to floatX.

    :type profile: None, True, str, or ProfileStats instance
    :param profile: accumulate profiling information into a given ProfileStats
    instance. None is the default, and means to use the value of
    config.profile.
    If argument is `True` then a new ProfileStats instance will be
    used.  If argument is a string, a new ProfileStats instance will be created
    with that string as its `message` attribute.  This profiling object will be
    available via self.profile.

    :type on_unused_input: str
    :param on_unused_input: What to do if a variable in the 'inputs' list
        is not used in the graph. Possible values are 'raise', 'warn',
        'ignore' and None.


    :rtype: theano.compile.Function
    :returns: a callable object that will compute the outputs (given the inputs)
    and update the implicit function arguments according to the `updates`.


    :note: Regarding givens: Be careful to make sure that these substitutions are
    independent--behaviour when Var1 of one pair appears in the graph leading to Var2 in
    another expression is undefined.  Replacements specified with givens are different from
    optimizations in that Var2 is not expected to be equivalent to Var1.

    """
    #
    # This function works by cloning the graph (except for the inputs), and then shipping it
    # off to compile.function
    # (There it will be cloned again, unnecessarily, because it doesn't know that we already
    # cloned it.)
    #
    # First, it clones the replacements named in the givens argument, and points each Var1 to
    # the clone of Var2.
    # Then it sets the inputs in the clone dictionary.
    # After these steps, we are assuming that the clone dictionary contains all the inputs to
    # the computation graph.
    #
    # Then it clones the outputs and the update expressions.  This rebuilds a computation graph
    # from the inputs and the givens.
    #
    if updates is None:
        updates = []
    if givens is None:
        givens = []
    if profile is None:
        profile = config.profile
        # profile -> True or False
    if profile == True:
        profile = ProfileStats(message=name)
        # profile -> object
    if type(profile) == str:
        profile = ProfileStats(message=profile)
    # profile is typically either False or an object at this point.
    # No need to block other objects being passed through though. It might be
    # useful.

    if not isinstance(params, (list, tuple)):
        raise Exception("in pfunc() the first argument must be a list or a tuple")

    if not isinstance(no_default_updates, bool)\
            and not isinstance(no_default_updates, list):
        raise TypeError("no_default_update should be either a boolean or a list")

    # transform params into theano.compile.In objects.
    inputs = [_pfunc_param_to_in(p, allow_downcast=allow_input_downcast)
              for p in params]

    # Check if some variable is present more than once in inputs
    in_variables = [input.variable for input in inputs]
    for i, v in enumerate(in_variables):
        if v in in_variables[(i + 1):]:
            dup_v_i = in_variables.index(v, (i + 1))
            raise UnusedInputError(
                    ("Variable %s is used twice in inputs to theano.function, "
                     "at indices %i and %i.  This would result in values "
                     "provided for it being ignored. Please do not duplicate "
                     "variables in the inputs list." % (v, i, dup_v_i)))

    output_vars = rebuild_collect_shared(outputs,
                                         in_variables,
                                         replace=givens,
                                         updates=updates,
                                         rebuild_strict=True,
                                         copy_inputs_over=True,
                                         no_default_updates=no_default_updates)
    # extracting the arguments
    input_variables, cloned_outputs, other_stuff = output_vars
    clone_d, update_d, update_expr, shared_inputs = other_stuff

    for i, iv in zip(inputs, input_variables):
        i.variable = iv

    for sv in shared_inputs:
        #pass value of None here
        #value will be stored in the resulting functions' defaults list
        #but since the value of shared variables never needs to be refed, it is not needed
        if sv in update_d:
            si = In(variable=sv, value=sv.container, mutable=True,
                    borrow=True, update=update_d[sv], shared=True)
        else:
            si = In(variable=sv, value=sv.container,
                    mutable=False, borrow=True, shared=True)
        inputs.append(si)

    return orig_function(inputs, cloned_outputs, mode,
            accept_inplace=accept_inplace, name=name, profile=profile,
            on_unused_input=on_unused_input)