def broadcast_like(value, template, fgraph, dtype=None): """ Return a Variable with the same shape and dtype as the template, filled by broadcasting value through it. `value` will be cast as necessary. """ value = T.as_tensor_variable(value) if value.type == template.type: return value if template not in fgraph.variables: raise NotImplementedError('broadcast_like currently requires the ' 'template Variable to be in the fgraph already') if hasattr(fgraph, 'shape_feature'): new_shape = fgraph.shape_feature.shape_of[template] else: new_shape = template.shape if dtype is None: dtype = template.dtype rval = T.alloc(T.cast(value, dtype), *new_shape) # the template may have 1s in its shape without being broadcastable if rval.broadcastable != template.broadcastable: rval = T.unbroadcast(rval, *[i for i in xrange(rval.ndim) if rval.broadcastable[i] and not template.broadcastable[i]]) assert rval.type.dtype == dtype if rval.type.broadcastable != template.broadcastable: raise AssertionError("rval.type.broadcastable is " + str(rval.type.broadcastable) + " but template.broadcastable is" + str(template.broadcastable)) return rval
def grad(self, inp, cost_grad): """ Notes ----- The gradient is currently implemented for matrices only. """ a, val, offset = inp grad = cost_grad[0] height, width = grad.shape if a.dtype.startswith("complex"): return [None, None] # only valid for matrices wr_a = fill_diagonal_offset(grad, 0, offset) offset_abs = basic.abs_(offset) pos_offset_flag = basic.ge(offset, 0) neg_offset_flag = basic.lt(offset, 0) min_wh = basic.minimum(width, height) start = offset * pos_offset_flag + offset_abs * width * neg_offset_flag num_of_step = basic.minimum( min_wh, width * pos_offset_flag + height * neg_offset_flag - offset_abs) step = a.shape[1] + 1 end = start + step * num_of_step # input of slice should be integer start = basic.cast(start, "int32") step = basic.cast(step, "int32") end = basic.cast(end, "int32") wr_val = grad.flatten()[start:end:step].sum() wr_offset = grad_undefined( self, 2, offset, "offset is not defined for non-integer offset so" " fill_diagonal_offset(a,val,offset+eps) is undefined", ) return [wr_a, wr_val, wr_offset]
def local_abstract_batch_norm_train(fgraph, node): if not isinstance(node.op, AbstractBatchNormTrain): return None x, scale, bias, epsilon, running_average_factor = node.inputs[:5] axes = node.op.axes if min(axes) < 0 or max(axes) > x.ndim: return None if ( not isinstance(x.type, TensorType) or not isinstance(scale.type, TensorType) or not isinstance(bias.type, TensorType) or not isinstance(epsilon.type, TensorType) or not isinstance(running_average_factor.type, TensorType) ): return None # optional running_mean and running_var if len(node.inputs) > 5 and not isinstance(node.inputs[5].type, TensorType): return None if len(node.inputs) > 6 and not isinstance(node.inputs[6].type, TensorType): return None mean = x.mean(axes, keepdims=True) var = x.var(axes, keepdims=True) # The epsilon should not upcast the dtype. if var.dtype == "float32" and epsilon.dtype == "float64": epsilon = epsilon.astype("float32") invstd = tt.inv(tt.sqrt(var + epsilon)) out = (x - mean) * (scale * invstd) + bias results = [out, mean, invstd] if len(node.inputs) > 5: running_mean = node.inputs[5] running_mean = ( running_mean * (1.0 - running_average_factor) + mean * running_average_factor ) results.append(running_mean) if len(node.inputs) > 6: m = tt.cast(tt.prod(x.shape) / tt.prod(scale.shape), theano.config.floatX) running_var = node.inputs[6] running_var = ( running_var * (1.0 - running_average_factor) + (m / (m - 1)) * var * running_average_factor ) results.append(running_var) results = [ tt.patternbroadcast(r, r_orig.broadcastable) for (r, r_orig) in zip(results, node.outputs) ] for var in theano.gof.graph.variables(node.inputs, results): if var not in node.inputs: copy_stack_trace(node.outputs[0], var) return results
def grad(self, inp, cost_grad): """ Notes ----- The gradient is currently implemented for matrices only. """ a, val, offset = inp grad = cost_grad[0] height, width = grad.shape if a.dtype.startswith("complex"): return [None, None] # only valid for matrices wr_a = fill_diagonal_offset(grad, 0, offset) offset_abs = basic.abs_(offset) pos_offset_flag = basic.ge(offset, 0) neg_offset_flag = basic.lt(offset, 0) min_wh = basic.minimum(width, height) start = offset * pos_offset_flag + offset_abs * width * neg_offset_flag num_of_step = basic.minimum(min_wh, width * pos_offset_flag + height * neg_offset_flag - offset_abs) step = a.shape[1] + 1 end = start + step * num_of_step # input of slice should be integer start = basic.cast(start, "int32") step = basic.cast(step, "int32") end = basic.cast(end, "int32") wr_val = grad.flatten()[start:end:step].sum() wr_offset = theano.gradient.grad_undefined( self, 2, offset, "offset is not defined for non-integer offset so" " fill_diagonal_offset(a,val,offset+eps) is undefined", ) return [wr_a, wr_val, wr_offset]
def sparse_mean(x, axis=None): """Mean of a tensor, alongside the specified axis. """ # bool is available since theano v0.9dev if 'int' in x.dtype or x.dtype == 'bool': dtype = floatx() else: dtype = x.dtype if isinstance(axis, (integer_types, np.integer)): if axis == -1: axis = max(x.ndim - 1, 0) s = th_sparse_module.sp_sum(x, axis, True) shp = shape(x) if s.dtype in ('float16', 'float32', 'complex64'): shp = cast(shp, 'float32') else: shp = cast(shp, 'float64') if axis is None: axis = list(range(len(x.data.shape))) elif isinstance(axis, (integer_types, np.integer)): axis = [axis] elif isinstance(axis, np.ndarray) and axis.ndim == 0: axis = [int(axis)] else: axis = [int(a) for a in axis] for i in axis: s = true_div(s, shp[i]) if s.dtype != shp.dtype and s.dtype in discrete_dtypes: s = cast(s, shp.dtype) if dtype == 'float16' or (dtype is None and x.dtype == 'float16'): s = cast(s, 'float16') s.name = 'mean' return s
def make_node(self, a, val): a = basic.as_tensor_variable(a) val = basic.as_tensor_variable(val) if a.ndim < 2: raise TypeError("%s: first parameter must have at least" " two dimensions" % self.__class__.__name__) elif val.ndim != 0: raise TypeError("%s: second parameter must be a scalar" % self.__class__.__name__) val = basic.cast(val, dtype=upcast(a.dtype, val.dtype)) if val.dtype != a.dtype: raise TypeError("%s: type of second parameter must be the same as" " the first's" % self.__class__.__name__) return Apply(self, [a, val], [a.type()])
def local_abstract_batch_norm_train(node): if not isinstance(node.op, AbstractBatchNormTrain): return None x, scale, bias, epsilon, running_average_factor = node.inputs[:5] axes = node.op.axes if min(axes) < 0 or max(axes) > x.ndim: return None if not isinstance(x.type, TensorType) or \ not isinstance(scale.type, TensorType) or \ not isinstance(bias.type, TensorType) or \ not isinstance(epsilon.type, TensorType) or \ not isinstance(running_average_factor.type, TensorType): return None # optional running_mean and running_var if len(node.inputs) > 5 and not isinstance(node.inputs[5].type, TensorType): return None if len(node.inputs) > 6 and not isinstance(node.inputs[6].type, TensorType): return None mean = x.mean(axes, keepdims=True) var = x.var(axes, keepdims=True) # The epsilon should not upcast the dtype. if var.dtype == 'float32' and epsilon.dtype == 'float64': epsilon = epsilon.astype('float32') invstd = T.inv(T.sqrt(var + epsilon)) out = (x - mean) * (scale * invstd) + bias results = [out, mean, invstd] if len(node.inputs) > 5: running_mean = node.inputs[5] running_mean = running_mean * (1.0 - running_average_factor) + \ mean * running_average_factor results.append(running_mean) if len(node.inputs) > 6: m = T.cast(T.prod(x.shape) / T.prod(scale.shape), theano.config.floatX) running_var = node.inputs[6] running_var = running_var * (1.0 - running_average_factor) + \ (m / (m - 1)) * var * running_average_factor results.append(running_var) results = [T.patternbroadcast(r, r_orig.broadcastable) for (r, r_orig) in zip(results, node.outputs)] for var in theano.gof.graph.variables(node.inputs, results): if var not in node.inputs: copy_stack_trace(node.outputs[0], var) return results
def make_node(self, a, val, offset): a = basic.as_tensor_variable(a) val = basic.as_tensor_variable(val) offset = basic.as_tensor_variable(offset) if a.ndim != 2: raise TypeError("%s: first parameter must have exactly" " two dimensions" % self.__class__.__name__) elif val.ndim != 0: raise TypeError("%s: second parameter must be a scalar" % self.__class__.__name__) elif offset.ndim != 0: raise TypeError("%s: third parameter must be a scalar" % self.__class__.__name__) val = basic.cast(val, dtype=upcast(a.dtype, val.dtype)) if val.dtype != a.dtype: raise TypeError("%s: type of second parameter must be the same" " as the first's" % self.__class__.__name__) elif offset.dtype not in theano.tensor.integer_dtypes: raise TypeError("%s: type of third parameter must be as integer" " use theano.tensor.cast( input, 'int32/int64')" % self.__class__.__name__) return Apply(self, [a, val, offset], [a.type()])
def make_node(self, rng, size, dtype, *dist_params): """Create a random variable node. XXX: Unnamed/non-keyword arguments are considered distribution parameters! If you want to set `size`, `rng`, and/or `name`, use their keywords. Parameters ---------- rng: RandomStateType Existing Theano `RandomState` object to be used. Creates a new one, if `None`. size: int or Sequence Numpy-like size of the output (i.e. replications). dtype: Theano dtype The dtype of the sampled output. This value is only used when `self.dtype` isn't set. dist_params: list Distribution parameters. Results ------- out: `Apply` A node with inputs `(rng, size, dtype) + dist_args` and outputs `(rng_var, out_var)`. """ if size is None: size = constant([], dtype="int64") elif isinstance(size, int): size = as_tensor_variable([size], ndim=1) elif not isinstance(size, (np.ndarray, Variable, Sequence)): raise TypeError( "Parameter size must be None, an integer, or a sequence with integers." ) else: size = cast(as_tensor_variable(size, ndim=1), "int64") assert size.dtype in int_dtypes dist_params = tuple( as_tensor_variable(p) if not isinstance(p, Variable) else p for p in dist_params ) if rng is None: rng = theano.shared(np.random.RandomState()) elif not isinstance(rng.type, RandomStateType): raise TypeError("The type of rng should be an instance of RandomStateType") bcast = self.compute_bcast(dist_params, size) dtype = self.dtype or dtype if dtype is None or (isinstance(dtype, str) and dtype not in all_dtypes): # dtype = tt.scal.upcast(self.dtype, *[p.dtype for p in dist_params]) raise TypeError("dtype is unspecified") if isinstance(dtype, str): dtype_idx = constant(all_dtypes.index(dtype), dtype="int64") else: dtype_idx = constant(dtype, dtype="int64") dtype = all_dtypes[dtype_idx.data] outtype = TensorType(dtype=dtype, broadcastable=bcast) out_var = outtype() inputs = (rng, size, dtype_idx) + dist_params outputs = (rng.type(), out_var) return Apply(self, inputs, outputs)
def infer_shape(self, fgraph, node, in_shapes): temp = node.inputs[0] M = basic.switch(basic.lt(temp, 0), basic.cast(0, temp.dtype), temp) return [[M]]