def wrap(self, wrapped, namespace): def apply(self, application, *args, **kwargs): # extra_ndim is a mandatory parameter, but in order not to # confuse with positional inputs, it has to be extracted from # **kwargs extra_ndim = kwargs.get('extra_ndim', 0) inputs = dict(zip(application.inputs, args)) inputs.update(dict_subset(kwargs, application.inputs, must_have=False)) reshaped_inputs = inputs # To prevent pollution of the computation graph with no-ops if extra_ndim > 0: for name, input_ in inputs.items(): shape, ndim = input_.shape, input_.ndim # Remember extra_dims for reshaping the outputs correctly. # Does not matter from which input, since we assume # extra dimension match for all inputs. extra_dims = shape[:extra_ndim] new_first_dim = tensor.prod(shape[:extra_ndim + 1]) new_shape = tensor.join( 0, new_first_dim[None], shape[extra_ndim + 1:]) reshaped_inputs[name] = input_.reshape( new_shape, ndim=ndim - extra_ndim) outputs = wrapped.__get__(self, None)(**reshaped_inputs) if extra_ndim == 0: return outputs reshaped_outputs = [] for output in pack(outputs): shape, ndim = output.shape, output.ndim new_shape = tensor.join( 0, extra_dims, (shape[0] // tensor.prod(extra_dims))[None], shape[1:]) reshaped_outputs.append( output.reshape(new_shape, ndim=ndim + extra_ndim)) return reshaped_outputs def apply_delegate(self): return wrapped.__get__(self, None) apply = application(rename_function(apply, wrapped.application_name)) apply.__doc__ = _wrapped_application_doc.format( _with_extra_dims_application_prefix, wrapped.brick.__name__, wrapped.application_name) apply_delegate = apply.delegate( rename_function(apply_delegate, wrapped.application_name + "_delegate")) namespace[wrapped.application_name] = apply namespace[wrapped.application_name + "_delegate"] = apply_delegate
def scan_function(*args): args = list(args) arg_names = (list(sequences_given) + [output for output in application.outputs if output in application.states] + list(contexts_given)) kwargs = dict(equizip(arg_names, args)) kwargs.update(rest_kwargs) outputs = application(iterate=False, **kwargs) # We want to save the computation graph returned by the # `application_function` when it is called inside the # `theano.scan`. application_call.inner_inputs = args application_call.inner_outputs = pack(outputs) return outputs
def recurrent(*args, **kwargs): """Wraps an apply method to allow its iterative application. This decorator allows you to use implementation of an RNN transition to process sequences without writing the iteration-related code again and again. In the most general form information flow of a recurrent network can be described as follows: depending on the context variables and driven by input sequences the RNN updates its states and produces output sequences. Thus the input variables of your transition function play one of three roles: an input, a context or a state. These roles should be specified in the method's signature to make iteration possible. Parameters ---------- inputs : list of strs Names of the arguments of the apply method that play input roles. states : list of strs Names of the arguments of the apply method that play state roles. contexts : list of strs Names of the arguments of the apply method that play context roles. outputs : list of strs Names of the outputs. """ def recurrent_wrapper(application_function): arg_spec = inspect.getargspec(application_function) arg_names = arg_spec.args[1:] @wraps(application_function) def recurrent_apply(brick, application, application_call, *args, **kwargs): """Iterates a transition function. Parameters ---------- iterate : bool If ``True`` iteration is made. By default ``True``. reverse : bool If ``True``, the sequences are processed in backward direction. ``False`` by default. return_initial_states : bool If ``True``, initial states are included in the returned state tensors. ``False`` by default. .. todo:: * Handle `updates` returned by the :func:`theano.scan` routine. * ``kwargs`` has a random order; check if this is a problem. """ # Extract arguments related to iteration and immediately relay the # call to the wrapped function if `iterate=False` iterate = kwargs.pop('iterate', True) if not iterate: return application_function(brick, *args, **kwargs) reverse = kwargs.pop('reverse', False) return_initial_states = kwargs.pop('return_initial_states', False) # Push everything to kwargs for arg, arg_name in zip(args, arg_names): kwargs[arg_name] = arg # Make sure that all arguments for scan are tensor variables scan_arguments = (application.sequences + application.states + application.contexts) for arg in scan_arguments: if arg in kwargs: if kwargs[arg] is None: del kwargs[arg] else: kwargs[arg] = tensor.as_tensor_variable(kwargs[arg]) # Check which sequence and contexts were provided sequences_given = dict_subset(kwargs, application.sequences, must_have=False) contexts_given = dict_subset(kwargs, application.contexts, must_have=False) # Determine number of steps and batch size. if len(sequences_given): # TODO Assumes 1 time dim! shape = list(sequences_given.values())[0].shape if not iterate: batch_size = shape[0] else: n_steps = shape[0] batch_size = shape[1] else: # TODO Raise error if n_steps and batch_size not found? n_steps = kwargs.pop('n_steps') batch_size = kwargs.pop('batch_size') # Handle the rest kwargs rest_kwargs = {key: value for key, value in kwargs.items() if key not in scan_arguments} for value in rest_kwargs.values(): if (isinstance(value, Variable) and not is_shared_variable(value)): logger.warning("unknown input {}".format(value) + unknown_scan_input) # Ensure that all initial states are available. for state_name in application.states: dim = brick.get_dim(state_name) if state_name in kwargs: if isinstance(kwargs[state_name], NdarrayInitialization): kwargs[state_name] = tensor.alloc( kwargs[state_name].generate(brick.rng, (1, dim)), batch_size, dim) elif isinstance(kwargs[state_name], Application): kwargs[state_name] = ( kwargs[state_name](state_name, batch_size, *args, **kwargs)) else: # TODO init_func returns 2D-tensor, fails for iterate=False kwargs[state_name] = ( brick.initial_state(state_name, batch_size, *args, **kwargs)) assert kwargs[state_name] states_given = dict_subset(kwargs, application.states) # Theano issue 1772 for name, state in states_given.items(): states_given[name] = tensor.unbroadcast(state, *range(state.ndim)) def scan_function(*args): args = list(args) arg_names = (list(sequences_given) + [output for output in application.outputs if output in application.states] + list(contexts_given)) kwargs = dict(equizip(arg_names, args)) kwargs.update(rest_kwargs) outputs = application(iterate=False, **kwargs) # We want to save the computation graph returned by the # `application_function` when it is called inside the # `theano.scan`. application_call.inner_inputs = args application_call.inner_outputs = pack(outputs) return outputs outputs_info = [ states_given[name] if name in application.states else None for name in application.outputs] result, updates = theano.scan( scan_function, sequences=list(sequences_given.values()), outputs_info=outputs_info, non_sequences=list(contexts_given.values()), n_steps=n_steps, go_backwards=reverse) result = pack(result) if return_initial_states: # Undo Subtensor for i in range(len(states_given)): assert isinstance(result[i].owner.op, tensor.subtensor.Subtensor) result[i] = result[i].owner.inputs[0] if updates: application_call.updates = dict_union(application_call.updates, updates) return result return recurrent_apply # Decorator can be used with or without arguments assert (args and not kwargs) or (not args and kwargs) if args: application_function, = args return application(recurrent_wrapper(application_function)) else: def wrap_application(application_function): return application(**kwargs)( recurrent_wrapper(application_function)) return wrap_application
def wrap_application(application_function): return application(**kwargs)( recurrent_wrapper(application_function))
def recurrent(*args, **kwargs): """Wraps an apply method to allow its iterative application. This decorator allows you to implement only one step of a recurrent network and enjoy applying it to sequences for free. The idea behind is that its most general form information flow of an RNN can be described as follows: depending on the context and driven by input sequences the RNN updates its states and produces output sequences. Given a method describing one step of an RNN and a specification which of its inputs are the elements of the input sequence, which are the states and which are the contexts, this decorator returns an application method which implements the whole RNN loop. The returned application method also has additional parameters, see documentation of the `recurrent_apply` inner function below. Parameters ---------- sequences : list of strs Specifies which of the arguments are elements of input sequences. states : list of strs Specifies which of the arguments are the states. contexts : list of strs Specifies which of the arguments are the contexts. outputs : list of strs Names of the outputs. The outputs whose names match with those in the `state` parameter are interpreted as next step states. Returns ------- recurrent_apply : :class:`~blocks.bricks.base.Application` The new application method that applies the RNN to sequences. See Also -------- :doc:`The tutorial on RNNs </rnn>` """ def recurrent_wrapper(application_function): arg_spec = inspect.getargspec(application_function) arg_names = arg_spec.args[1:] @wraps(application_function) def recurrent_apply(brick, application, application_call, *args, **kwargs): """Iterates a transition function. Parameters ---------- iterate : bool If ``True`` iteration is made. By default ``True``. reverse : bool If ``True``, the sequences are processed in backward direction. ``False`` by default. return_initial_states : bool If ``True``, initial states are included in the returned state tensors. ``False`` by default. """ # Extract arguments related to iteration and immediately relay the # call to the wrapped function if `iterate=False` iterate = kwargs.pop('iterate', True) if not iterate: return application_function(brick, *args, **kwargs) reverse = kwargs.pop('reverse', False) return_initial_states = kwargs.pop('return_initial_states', False) # Push everything to kwargs for arg, arg_name in zip(args, arg_names): kwargs[arg_name] = arg # Make sure that all arguments for scan are tensor variables scan_arguments = (application.sequences + application.states + application.contexts) for arg in scan_arguments: if arg in kwargs: if kwargs[arg] is None: del kwargs[arg] else: kwargs[arg] = tensor.as_tensor_variable(kwargs[arg]) # Check which sequence and contexts were provided sequences_given = dict_subset(kwargs, application.sequences, must_have=False) contexts_given = dict_subset(kwargs, application.contexts, must_have=False) # Determine number of steps and batch size. if len(sequences_given): # TODO Assumes 1 time dim! shape = list(sequences_given.values())[0].shape if not iterate: batch_size = shape[0] else: n_steps = shape[0] batch_size = shape[1] else: # TODO Raise error if n_steps and batch_size not found? n_steps = kwargs.pop('n_steps') batch_size = kwargs.pop('batch_size') # Handle the rest kwargs rest_kwargs = {key: value for key, value in kwargs.items() if key not in scan_arguments} for value in rest_kwargs.values(): if (isinstance(value, Variable) and not is_shared_variable(value)): logger.warning("unknown input {}".format(value) + unknown_scan_input) # Ensure that all initial states are available. initial_states = brick.initial_states(batch_size, as_dict=True, *args, **kwargs) for state_name in application.states: dim = brick.get_dim(state_name) if state_name in kwargs: if isinstance(kwargs[state_name], NdarrayInitialization): kwargs[state_name] = tensor.alloc( kwargs[state_name].generate(brick.rng, (1, dim)), batch_size, dim) elif isinstance(kwargs[state_name], Application): kwargs[state_name] = ( kwargs[state_name](state_name, batch_size, *args, **kwargs)) else: try: kwargs[state_name] = initial_states[state_name] except KeyError: raise KeyError( "no initial state for '{}' of the brick {}".format( state_name, brick.name)) states_given = dict_subset(kwargs, application.states) # Theano issue 1772 for name, state in states_given.items(): states_given[name] = tensor.unbroadcast(state, *range(state.ndim)) def scan_function(*args): args = list(args) arg_names = (list(sequences_given) + [output for output in application.outputs if output in application.states] + list(contexts_given)) kwargs = dict(equizip(arg_names, args)) kwargs.update(rest_kwargs) outputs = application(iterate=False, **kwargs) # We want to save the computation graph returned by the # `application_function` when it is called inside the # `theano.scan`. application_call.inner_inputs = args application_call.inner_outputs = pack(outputs) return outputs outputs_info = [ states_given[name] if name in application.states else None for name in application.outputs] result, updates = theano.scan( scan_function, sequences=list(sequences_given.values()), outputs_info=outputs_info, non_sequences=list(contexts_given.values()), n_steps=n_steps, go_backwards=reverse, name='{}_{}_scan'.format( brick.name, application.application_name)) result = pack(result) if return_initial_states: # Undo Subtensor for i in range(len(states_given)): assert isinstance(result[i].owner.op, tensor.subtensor.Subtensor) result[i] = result[i].owner.inputs[0] if updates: application_call.updates = dict_union(application_call.updates, updates) return result return recurrent_apply # Decorator can be used with or without arguments assert (args and not kwargs) or (not args and kwargs) if args: application_function, = args return application(recurrent_wrapper(application_function)) else: def wrap_application(application_function): return application(**kwargs)( recurrent_wrapper(application_function)) return wrap_application
def recurrent(*args, **kwargs): """Wraps an apply method to allow its iterative application. This decorator allows you to implement only one step of a recurrent network and enjoy applying it to sequences for free. The idea behind is that its most general form information flow of an RNN can be described as follows: depending on the context and driven by input sequences the RNN updates its states and produces output sequences. Given a method describing one step of an RNN and a specification which of its inputs are the elements of the input sequence, which are the states and which are the contexts, this decorator returns an application method which implements the whole RNN loop. The returned application method also has additional parameters, see documentation of the `recurrent_apply` inner function below. Parameters ---------- sequences : list of strs Specifies which of the arguments are elements of input sequences. states : list of strs Specifies which of the arguments are the states. contexts : list of strs Specifies which of the arguments are the contexts. outputs : list of strs Names of the outputs. The outputs whose names match with those in the `state` parameter are interpreted as next step states. Returns ------- recurrent_apply : :class:`~blocks.bricks.base.Application` The new application method that applies the RNN to sequences. See Also -------- :doc:`The tutorial on RNNs </rnn>` """ def recurrent_wrapper(application_function): arg_spec = inspect.getargspec(application_function) arg_names = arg_spec.args[1:] @wraps(application_function) def recurrent_apply(brick, application, application_call, *args, **kwargs): """Iterates a transition function. Parameters ---------- iterate : bool If ``True`` iteration is made. By default ``True``. reverse : bool If ``True``, the sequences are processed in backward direction. ``False`` by default. return_initial_states : bool If ``True``, initial states are included in the returned state tensors. ``False`` by default. """ # Extract arguments related to iteration and immediately relay the # call to the wrapped function if `iterate=False` iterate = kwargs.pop('iterate', True) if not iterate: return application_function(brick, *args, **kwargs) reverse = kwargs.pop('reverse', False) return_initial_states = kwargs.pop('return_initial_states', False) # Push everything to kwargs for arg, arg_name in zip(args, arg_names): kwargs[arg_name] = arg # Make sure that all arguments for scan are tensor variables scan_arguments = (application.sequences + application.states + application.contexts) for arg in scan_arguments: if arg in kwargs: if kwargs[arg] is None: del kwargs[arg] else: kwargs[arg] = tensor.as_tensor_variable(kwargs[arg]) # Check which sequence and contexts were provided sequences_given = dict_subset(kwargs, application.sequences, must_have=False) contexts_given = dict_subset(kwargs, application.contexts, must_have=False) # Determine number of steps and batch size. if len(sequences_given): # TODO Assumes 1 time dim! shape = list(sequences_given.values())[0].shape if not iterate: batch_size = shape[0] else: n_steps = shape[0] batch_size = shape[1] else: # TODO Raise error if n_steps and batch_size not found? n_steps = kwargs.pop('n_steps') batch_size = kwargs.pop('batch_size') # Handle the rest kwargs rest_kwargs = { key: value for key, value in kwargs.items() if key not in scan_arguments } for value in rest_kwargs.values(): if (isinstance(value, Variable) and not is_shared_variable(value)): logger.warning("unknown input {}".format(value) + unknown_scan_input) # Ensure that all initial states are available. initial_states = brick.initial_states(batch_size, as_dict=True, *args, **kwargs) for state_name in application.states: dim = brick.get_dim(state_name) if state_name in kwargs: if isinstance(kwargs[state_name], NdarrayInitialization): kwargs[state_name] = tensor.alloc( kwargs[state_name].generate(brick.rng, (1, dim)), batch_size, dim) elif isinstance(kwargs[state_name], Application): kwargs[state_name] = (kwargs[state_name](state_name, batch_size, *args, **kwargs)) else: try: kwargs[state_name] = initial_states[state_name] except KeyError: raise KeyError( "no initial state for '{}' of the brick {}".format( state_name, brick.name)) states_given = dict_subset(kwargs, application.states) # Theano issue 1772 for name, state in states_given.items(): states_given[name] = tensor.unbroadcast( state, *range(state.ndim)) def scan_function(*args): args = list(args) arg_names = (list(sequences_given) + [ output for output in application.outputs if output in application.states ] + list(contexts_given)) kwargs = dict(equizip(arg_names, args)) kwargs.update(rest_kwargs) outputs = application(iterate=False, **kwargs) # We want to save the computation graph returned by the # `application_function` when it is called inside the # `theano.scan`. application_call.inner_inputs = args application_call.inner_outputs = pack(outputs) return outputs outputs_info = [ states_given[name] if name in application.states else None for name in application.outputs ] result, updates = theano.scan( scan_function, sequences=list(sequences_given.values()), outputs_info=outputs_info, non_sequences=list(contexts_given.values()), n_steps=n_steps, go_backwards=reverse, name='{}_{}_scan'.format(brick.name, application.application_name)) result = pack(result) if return_initial_states: # Undo Subtensor for i in range(len(states_given)): assert isinstance(result[i].owner.op, tensor.subtensor.Subtensor) result[i] = result[i].owner.inputs[0] if updates: application_call.updates = dict_union(application_call.updates, updates) return result return recurrent_apply # Decorator can be used with or without arguments assert (args and not kwargs) or (not args and kwargs) if args: application_function, = args return application(recurrent_wrapper(application_function)) else: def wrap_application(application_function): return application(**kwargs)( recurrent_wrapper(application_function)) return wrap_application
def recurrent(*args, **kwargs): """Wraps an apply method to allow its iterative application. This decorator allows you to use implementation of an RNN transition to process sequences without writing the iteration-related code again and again. In the most general form information flow of a recurrent network can be described as follows: depending on the context variables and driven by input sequences the RNN updates its states and produces output sequences. Thus the input variables of your transition function play one of three roles: an input, a context or a state. These roles should be specified in the method's signature to make iteration possible. Parameters ---------- inputs : list of strs Names of the arguments of the apply method that play input roles. states : list of strs Names of the arguments of the apply method that play state roles. contexts : list of strs Names of the arguments of the apply method that play context roles. outputs : list of strs Names of the outputs. """ def recurrent_wrapper(application_function): arg_spec = inspect.getargspec(application_function) arg_names = arg_spec.args[1:] @wraps(application_function) def recurrent_apply(brick, application, application_call, *args, **kwargs): """Iterates a transition function. Parameters ---------- iterate : bool If ``True`` iteration is made. By default ``True``. reverse : bool If ``True``, the sequences are processed in backward direction. ``False`` by default. return_initial_states : bool If ``True``, initial states are included in the returned state tensors. ``False`` by default. .. todo:: * Handle `updates` returned by the :func:`theano.scan` routine. * ``kwargs`` has a random order; check if this is a problem. """ # Extract arguments related to iteration. iterate = kwargs.pop('iterate', True) reverse = kwargs.pop('reverse', False) return_initial_states = kwargs.pop('return_initial_states', False) # Push everything to kwargs for arg, arg_name in zip(args, arg_names): kwargs[arg_name] = arg # Separate sequences, states and contexts scan_arguments = (application.sequences + application.states + application.contexts) # Check what is given and what is not def only_given(arg_names): return OrderedDict((arg_name, kwargs[arg_name]) for arg_name in arg_names if kwargs.get(arg_name)) sequences_given = only_given(application.sequences) contexts_given = only_given(application.contexts) # TODO Assumes 1 time dim! if len(sequences_given): shape = list(sequences_given.values())[0].shape if not iterate: batch_size = shape[0] else: n_steps = shape[0] batch_size = shape[1] else: # TODO Raise error if n_steps and batch_size not found? n_steps = kwargs.pop('n_steps') batch_size = kwargs.pop('batch_size') # Handle the rest kwargs rest_kwargs = {key: value for key, value in kwargs.items() if key not in scan_arguments} for value in rest_kwargs.values(): if (isinstance(value, Variable) and not is_shared_variable(value)): warnings.warn( 'Your function uses a non-shared variable other than' ' those given by scan explicitly. That can' ' significantly slow down `tensor.grad` call.' ' Did you forget to declare it in `contexts`?') # Ensure that all initial states are available. for state_name in application.states: dim = brick.get_dim(state_name) if state_name in kwargs: if isinstance(kwargs[state_name], NdarrayInitialization): kwargs[state_name] = tensor.alloc( kwargs[state_name].generate(brick.rng, (1, dim)), batch_size, dim) elif isinstance(kwargs[state_name], Application): kwargs[state_name] = \ kwargs[state_name](state_name, batch_size, *args, **kwargs) else: # TODO init_func returns 2D-tensor, fails for iterate=False kwargs[state_name] = \ brick.initial_state(state_name, batch_size, *args, **kwargs) assert kwargs[state_name] states_given = only_given(application.states) assert len(states_given) == len(application.states) # Theano issue 1772 for name, state in states_given.items(): states_given[name] = tensor.unbroadcast(state, *range(state.ndim)) # Apply methods if not iterate: return application_function(brick, **kwargs) def scan_function(*args): args = list(args) arg_names = (list(sequences_given) + list(states_given) + list(contexts_given)) kwargs = dict(zip(arg_names, args)) kwargs.update(rest_kwargs) return application_function(brick, **kwargs) outputs_info = (list(states_given.values()) + [None] * (len(application.outputs) - len(application.states))) result, updates = theano.scan( scan_function, sequences=list(sequences_given.values()), outputs_info=outputs_info, non_sequences=list(contexts_given.values()), n_steps=n_steps, go_backwards=reverse) result = pack(result) if return_initial_states: # Undo Subtensor for i in range(len(states_given)): assert isinstance(result[i].owner.op, tensor.subtensor.Subtensor) result[i] = result[i].owner.inputs[0] if updates: application_call.updates = dict_union(application_call.updates, updates) return result return recurrent_apply # Decorator can be used with or without arguments assert (args and not kwargs) or (not args and kwargs) if args: application_function, = args return application(recurrent_wrapper(application_function)) else: def wrap_application(application_function): return application(**kwargs)( recurrent_wrapper(application_function)) return wrap_application
def recurrent(*args, **kwargs): """Wraps an apply method to allow its iterative application. This decorator allows you to use implementation of an RNN transition to process sequences without writing the iteration-related code again and again. In the most general form information flow of a recurrent network can be described as follows: depending on the context variables and driven by input sequences the RNN updates its states and produces output sequences. Thus the input variables of your transition function play one of three roles: an input, a context or a state. These roles should be specified in the method's signature to make iteration possible. Parameters ---------- inputs : list of strs Names of the arguments of the apply method that play input roles. states : list of strs Names of the arguments of the apply method that play state roles. contexts : list of strs Names of the arguments of the apply method that play context roles. outputs : list of strs Names of the outputs. """ def recurrent_wrapper(application_function): arg_spec = inspect.getargspec(application_function) arg_names = arg_spec.args[1:] @wraps(application_function) def recurrent_apply(brick, application, application_call, *args, **kwargs): """Iterates a transition function. Parameters ---------- iterate : bool If ``True`` iteration is made. By default ``True``. reverse : bool If ``True``, the sequences are processed in backward direction. ``False`` by default. return_initial_states : bool If ``True``, initial states are included in the returned state tensors. ``False`` by default. .. todo:: * Handle `updates` returned by the :func:`theano.scan` routine. * ``kwargs`` has a random order; check if this is a problem. """ # Extract arguments related to iteration and immediately relay the # call to the wrapped function if `iterate=False` iterate = kwargs.pop('iterate', True) if not iterate: return application_function(brick, *args, **kwargs) reverse = kwargs.pop('reverse', False) return_initial_states = kwargs.pop('return_initial_states', False) # Push everything to kwargs for arg, arg_name in zip(args, arg_names): kwargs[arg_name] = arg # Make sure that all arguments for scan are tensor variables scan_arguments = (application.sequences + application.states + application.contexts) for arg in scan_arguments: if arg in kwargs: if kwargs[arg] is None: del kwargs[arg] else: kwargs[arg] = tensor.as_tensor_variable(kwargs[arg]) # Check which sequence and contexts were provided sequences_given = dict_subset(kwargs, application.sequences, must_have=False) contexts_given = dict_subset(kwargs, application.contexts, must_have=False) # Determine number of steps and batch size. if len(sequences_given): # TODO Assumes 1 time dim! shape = list(sequences_given.values())[0].shape if not iterate: batch_size = shape[0] else: n_steps = shape[0] batch_size = shape[1] else: # TODO Raise error if n_steps and batch_size not found? n_steps = kwargs.pop('n_steps') batch_size = kwargs.pop('batch_size') # Handle the rest kwargs rest_kwargs = { key: value for key, value in kwargs.items() if key not in scan_arguments } for value in rest_kwargs.values(): if (isinstance(value, Variable) and not is_shared_variable(value)): logger.warning("unknown input {}".format(value) + unknown_scan_input) # Ensure that all initial states are available. for state_name in application.states: dim = brick.get_dim(state_name) if state_name in kwargs: if isinstance(kwargs[state_name], NdarrayInitialization): kwargs[state_name] = tensor.alloc( kwargs[state_name].generate(brick.rng, (1, dim)), batch_size, dim) elif isinstance(kwargs[state_name], Application): kwargs[state_name] = (kwargs[state_name](state_name, batch_size, *args, **kwargs)) else: # TODO init_func returns 2D-tensor, fails for iterate=False kwargs[state_name] = (brick.initial_state( state_name, batch_size, *args, **kwargs)) assert kwargs[state_name] states_given = dict_subset(kwargs, application.states) # Theano issue 1772 for name, state in states_given.items(): states_given[name] = tensor.unbroadcast( state, *range(state.ndim)) def scan_function(*args): args = list(args) arg_names = (list(sequences_given) + [ output for output in application.outputs if output in application.states ] + list(contexts_given)) kwargs = dict(equizip(arg_names, args)) kwargs.update(rest_kwargs) outputs = application(iterate=False, **kwargs) # We want to save the computation graph returned by the # `application_function` when it is called inside the # `theano.scan`. application_call.inner_inputs = args application_call.inner_outputs = pack(outputs) return outputs outputs_info = [ states_given[name] if name in application.states else None for name in application.outputs ] result, updates = theano.scan( scan_function, sequences=list(sequences_given.values()), outputs_info=outputs_info, non_sequences=list(contexts_given.values()), n_steps=n_steps, go_backwards=reverse, name='{}_{}_scan'.format(brick.name, application.application_name)) result = pack(result) if return_initial_states: # Undo Subtensor for i in range(len(states_given)): assert isinstance(result[i].owner.op, tensor.subtensor.Subtensor) result[i] = result[i].owner.inputs[0] if updates: application_call.updates = dict_union(application_call.updates, updates) return result return recurrent_apply # Decorator can be used with or without arguments assert (args and not kwargs) or (not args and kwargs) if args: application_function, = args return application(recurrent_wrapper(application_function)) else: def wrap_application(application_function): return application(**kwargs)( recurrent_wrapper(application_function)) return wrap_application