def __eq__(self, other): # Check if we are dealing with same type of objects if not type(self) == type(other): return False if self.options != other.options: return False if self.mintals != other.mintaps: return False # Check if the number of different types of arguments is the same diff_args = ['inputs', 'outputs', 'lengths', 'mintaps', 'switches'] for arg in diff_args: if len(getattr(self, arg)) != len(getattr(other, arg)): return False for x, y in izip(self.inputs, other.inputs): if x.type != y.type: return False for x, y in izip(self.lengths, other.lengths): if x.type != y.type: return False s_ins = [self.index] + self.inputs + self.lengths + self.switches o_ins = [other.index] + other.inputs + other.lengths + other.switches givens = dict(izip(s_ins, o_ins)) # This part might be slow for x, y in izip(self.outputs, other.outputs): if not gof.graph.is_same_graph(x, y, givens=givens): return False return True
def p(node, args, outs): # copy inputs if not inplace if not self.inplace: for _, _, val in state_buffers: val[0] = val[0].copy() for buf in non_numeric_states_bufs: buf[0] = buf[0].copy() # reset all switches if any for sw in self.switches: sw.set_value(numpy.int8(0), borrow=True) # set aux shared variables for var, val in aux_buffers: var.set_value(val[0], borrow=True) # set state shared variables for var, length, val in state_buffers: var.set_value(val[0], borrow=True) length.set_value(val[0].shape[0], borrow=True) self.index.set_value(numpy.int64(0)) # grab fixed arguments fix_args = [x[0] for x in non_tensor_buffers] for dx in xrange(node_input_storage[0][0]): extra_args = [x[0] for x in non_numeric_states_bufs] rvals = self.fn(*(fix_args + extra_args)) for buf, rval in izip(non_numeric_states_bufs, rvals): buf[0] = rval for pos in xrange(n_numeric_values): buf = state_buffers[pos][0].get_value(borrow=True) mintap = self.mintaps[pos] node_output_storage[pos][0] = buf for out_buf, in_buf in izip( node_output_storage[n_numeric_values:], non_numeric_states_bufs): out_buf[0] = in_buf[0]
def infer_shape(self, node, input_shapes): for inp, inp_shp in izip(node.inputs, input_shapes): assert inp_shp is None or len(inp_shp) == inp.type.ndim n_outs = len(self.outputs) if self.as_repeatUntil is not None: return [(Shape_i(0)(o),) + x[1:] for o, x in izip(node.outputs, input_shapes[1: n_outs + 1])] else: return input_shapes[1: n_outs + 1]
def map_storage(fgraph, order, input_storage, output_storage): """Ensure there is storage (a length-1 list) for inputs, outputs, and interior nodes. :param fgraph: The current fgraph. This function uses the inputs and outputs attributes. :param order: an iterable over Apply instances (in program running order) :param input_storage: None or existing input storage (see below) :param output_storage: None or existing output storage (see below) :rtype: 3-tuple :returns: (list of storage for inputs, list of storage for outputs, and the `storage_map`) This function iterates over the nodes in `order` and ensures that for every input and output `Variable`, there is a unique storage container. This is returned as a dictionary Variable->storage called the `storage_map`. This function also returns `input_storage` which is a list of storages corresponding to fgraph.inputs. This function also returns `output_storage` which is a list of storages corresponding to fgraph.outputs. """ # each Apply argument's data is stored in a list of length 1 (these lists act like pointers) # input_storage is a list of data-containers for the inputs. if input_storage is None: input_storage = [[None] for input in fgraph.inputs] else: assert len(fgraph.inputs) == len(input_storage) storage_map = {} for r, storage in izip(fgraph.inputs, input_storage): storage_map[r] = storage # for orphan in fgraph.orphans: # if not isinstance(orphan, Constant): # raise TypeError("Cannot link a graph with non-constant orphans.", orphan) # storage_map[orphan] = [orphan.data] if output_storage is not None: assert len(fgraph.outputs) == len(output_storage) for r, storage in izip(fgraph.outputs, output_storage): storage_map[r] = storage for node in order: for r in node.inputs: if r not in storage_map: assert isinstance(r, graph.Constant) storage_map[r] = [r.data] for r in node.outputs: storage_map.setdefault(r, [None]) for r in fgraph.outputs: if isinstance(r, graph.Constant): storage_map.setdefault(r, [r.data]) if output_storage is None: output_storage = [storage_map[r] for r in fgraph.outputs] return input_storage, output_storage, storage_map
def make_thunk(self, node, storage_map, _, _2, impl=None): # TODO support broadcast! # TODO assert all input have the same shape fct_name = "pycuda_elemwise_%s" % str(self.scalar_op) in_name = ["i" + str(id) for id in range(len(node.inputs))] out_name = ["o" + str(id) for id in range(self.nout)] c_code = self.scalar_op.c_code(node, "some_name", tuple([n + "[i]" for n in in_name]), tuple(n + "[i]" for n in out_name), {}) c_code_param = ", ".join( [_replace_npy_types(var.type.dtype_specs()[1]) + " *" + name for var, name in chain(izip(node.inputs, in_name), izip(node.outputs, out_name))] + ["int size"]) mod = SourceModule(""" __global__ void %s(%s) { int i = (blockIdx.x+blockIdx.y*gridDim.x)*(blockDim.x*blockDim.y); i += threadIdx.x + threadIdx.y*blockDim.x; if(i<size){ %s } } """ % (fct_name, c_code_param, c_code)) pycuda_fct = mod.get_function(fct_name) inputs = [storage_map[v] for v in node.inputs] outputs = [storage_map[v] for v in node.outputs] def thunk(): z = outputs[0] if (z[0] is None or z[0].shape != inputs[0][0].shape or not z[0].is_c_contiguous()): z[0] = theano.sandbox.cuda.CudaNdarray.zeros( inputs[0][0].shape) if inputs[0][0].shape != inputs[1][0].shape: raise TypeError("PycudaElemwiseSourceModuleMakeThunkOp:" " inputs don't have the same shape!") if inputs[0][0].size > 512: grid = (int(numpy.ceil(inputs[0][0].size / 512.)), 1) block = (512, 1, 1) else: grid = (1, 1) block = (inputs[0][0].shape[0], inputs[0][0].shape[1], 1) pycuda_fct(inputs[0][0], inputs[1][0], z[0], numpy.intc(inputs[1][0].size), block=block, grid=grid) thunk.inputs = inputs thunk.outputs = outputs thunk.lazy = False return thunk
def f(): for inputs in input_lists[1:]: for input1, input2 in izip(inputs0, inputs): input2.storage[0] = copy(input1.storage[0]) for x in to_reset: x[0] = None pre(self, [input.data for input in input_lists[0]], order, thunk_groups) for i, (thunks, node) in enumerate(izip(thunk_groups, order)): try: wrapper(i, node, *thunks) except Exception: raise_with_op(node, *thunks)
def infer_shape(outs, inputs, input_shapes): """ Compute the shape of the outputs given the shape of the inputs of a theano graph. We do it this way to avoid compiling the inner function just to get the shape. Changes to ShapeFeature could require changes in this function. """ # We use a ShapeFeature because it has all the necessary logic # inside. We don't use the full ShapeFeature interface, but we # let it initialize itself with an empty fgraph, otherwise we will # need to do it manually for inp, inp_shp in izip(inputs, input_shapes): if inp_shp is not None and len(inp_shp) != inp.ndim: assert len(inp_shp) == inp.ndim shape_feature = tensor.opt.ShapeFeature() shape_feature.on_attach(theano.gof.FunctionGraph([], [])) # Initialize shape_of with the input shapes for inp, inp_shp in izip(inputs, input_shapes): shape_feature.set_shape(inp, inp_shp) def local_traverse(out): """ Go back in the graph, from out, adding computable shapes to shape_of. """ if out in shape_feature.shape_of: # Its shape is already known return elif out.owner is None: # This is an input of the graph shape_feature.init_r(out) else: # Recurse over inputs for inp in out.owner.inputs: if not inp in shape_feature.shape_of: local_traverse(inp) # shape_feature.on_import does not actually use an fgraph # It will call infer_shape and set_shape appropriately dummy_fgraph = None shape_feature.on_import(dummy_fgraph, out.owner, reason="dummy") ret = [] for o in outs: local_traverse(o) ret.append(shape_feature.shape_of[o]) return ret
def grad(self, inputs, output_grads): # OpFromGraph doesn't implement a connection_pattern, so for # now we regard all inputs and outputs as connected. This will # compute the right numerical value for the gradients but # could fail to raise the disconnected inputs error in some # cases. if hasattr(self, "grad_ops"): grad_ops = self.grad_ops else: gs = theano.gradient.grad(cost=None, known_grads=dict(izip(self.new_outputs, output_grads)), wrt=self.new_inputs, disconnected_inputs='ignore') grad_ops = [] for g in gs: if g is None: grad_ops.append(lambda *args: None) else: # It is normal if some inputs are not needed in order # to compute the gradient, so we ignore them. grad_ops.append(OpFromGraph(self.new_inputs + output_grads, [g], on_unused_input='ignore')) self.grad_ops = grad_ops return [go(*(inputs + output_grads)) for go in grad_ops]
def make_node(self, c, *args): assert len(args) == 2 * self.n_outs, ( "Wrong number of arguments to make_node: " "expected %d, got %d" % (2 * self.n_outs, len(args)) ) if not self.gpu: # When gpu is true, we are given only cuda ndarrays, and we want # to keep them be cuda ndarrays c = theano.tensor.as_tensor_variable(c) nw_args = [] for x in args: if isinstance(x, theano.Variable): nw_args.append(x) else: nw_args.append(theano.tensor.as_tensor_variable(x)) args = nw_args ts = args[:self.n_outs] fs = args[self.n_outs:] for t, f in izip(ts, fs): if t.type != f.type: raise TypeError(('IfElse requires same types for true and ' 'false return values'), t, f, t.type, f.type) if c.ndim > 0: raise TypeError(('Condition given to the op has to be a scalar ' 'with 0 standing for False, anything else ' 'for True')) return Apply(self, [c] + list(args), [t.type() for t in ts])
def __init__(self, inputs, outputs, **kwargs): if not isinstance(outputs, list): raise TypeError("outputs must be list", outputs) for i in inputs + outputs: if not isinstance(i, gof.Variable): raise TypeError("inputs and outputs must be Variable instances", i) if "updates" in kwargs or "givens" in kwargs: raise TypeError("updates and givens are not allowed in kwargs") # To support correctly shared variables the inner fct should # not see them. Otherwise their is problem with the gradient. self.shared_inputs = [var for var in gof.graph.inputs(outputs) if isinstance(var, SharedVariable)] shared_vars = [var.type() for var in self.shared_inputs] new = rebuild_collect_shared( outputs, inputs=inputs + shared_vars, replace=dict(izip(self.shared_inputs, shared_vars)), copy_inputs_over=False, ) (new_inputs, new_outputs, [clone_d, update_d, update_expr, shared_inputs]) = new assert len(new_inputs) == len(inputs) + len(self.shared_inputs) assert len(new_outputs) == len(outputs) assert not update_d assert not update_expr assert not shared_inputs self.new_inputs = new_inputs self.new_outputs = new_outputs self.inputs = inputs self.outputs = outputs self.kwargs = kwargs self.input_types = [input.type for input in inputs] self.output_types = [output.type for output in outputs]
def perform(self, node, inputs, outputs): variables = self.fn(*inputs) assert len(variables) == len(outputs) for output, variable in izip(outputs, variables): # TODO: when function's output-borrowing semantics are correct, # we wont need this copy anymore output[0] = variable.copy()
def shape_of_variables(fgraph, input_shapes): """ Compute the numeric shape of all intermediate variables given input shapes. Parameters ---------- fgraph The theano.FunctionGraph in question. input_shapes : dict A dict mapping input to shape. Returns ------- shapes : dict A dict mapping variable to shape .. warning:: This modifies the fgraph. Not pure. Examples -------- >>> import theano >>> x = theano.tensor.matrix('x') >>> y = x[512:]; y.name = 'y' >>> fgraph = theano.FunctionGraph([x], [y], clone=False) >>> d = shape_of_variables(fgraph, {x: (1024, 1024)}) >>> d[y] (array(512), array(1024)) >>> d[x] (array(1024), array(1024)) """ if not hasattr(fgraph, 'shape_feature'): fgraph.attach_feature(theano.tensor.opt.ShapeFeature()) input_dims = [dimension for inp in fgraph.inputs for dimension in fgraph.shape_feature.shape_of[inp]] output_dims = [dimension for shape in fgraph.shape_feature.shape_of.values() for dimension in shape] compute_shapes = theano.function(input_dims, output_dims) if any([i not in fgraph.inputs for i in input_shapes.keys()]): raise ValueError( "input_shapes keys aren't in the fgraph.inputs. FunctionGraph()" " interface changed. Now by default, it clones the graph it receives." " To have the old behavior, give it this new parameter `clone=False`.") numeric_input_dims = [dim for inp in fgraph.inputs for dim in input_shapes[inp]] numeric_output_dims = compute_shapes(*numeric_input_dims) sym_to_num_dict = dict(izip(output_dims, numeric_output_dims)) l = {} for var in fgraph.shape_feature.shape_of: l[var] = tuple(sym_to_num_dict[sym] for sym in fgraph.shape_feature.shape_of[var]) return l
def __init__( self, inputs, outputs, inline=False, lop_overrides='default', grad_overrides='default', rop_overrides='default', name=None, **kwargs ): if not isinstance(outputs, list): raise TypeError('outputs must be list, got %s' % type(outputs)) for i in inputs + outputs: if not isinstance(i, gof.Variable): raise TypeError( 'inputs and outputs must be Variable instances', i) if 'updates' in kwargs or 'givens' in kwargs: raise TypeError('updates and givens are not allowed here') self.is_inline = inline # To correctly support shared variables the inner fct should # not see them. Otherwise there is a problem with the gradient. self.shared_inputs = [var for var in gof.graph.inputs(outputs) if isinstance(var, SharedVariable)] shared_vars = [var.type() for var in self.shared_inputs] new = rebuild_collect_shared(outputs, inputs=inputs + shared_vars, replace=dict(izip( self.shared_inputs, shared_vars)), copy_inputs_over=False) (local_inputs, local_outputs, [clone_d, update_d, update_expr, shared_inputs]) = new assert len(local_inputs) == len(inputs) + len(self.shared_inputs) assert len(local_outputs) == len(outputs) assert not update_d assert not update_expr assert not shared_inputs self.local_inputs = local_inputs self.local_outputs = local_outputs self.inputs = inputs self.outputs = outputs self.kwargs = kwargs self.input_types = [inp.type for inp in inputs] self.output_types = [out.type for out in outputs] if lop_overrides != 'default': if grad_overrides != 'default': raise ValueError('lop_overrides and grad_overrides are mutually exclusive') else: self.set_lop_overrides(lop_overrides) self._lop_type = 'lop' elif grad_overrides != 'default': self.set_lop_overrides(grad_overrides) self._lop_type = 'grad' else: self.set_lop_overrides('default') self._lop_type = 'lop' self.set_rop_overrides(rop_overrides) if name is not None: assert isinstance(name, str), 'name must be None or string object' self.name = name
def streamline_nice_errors_f(): for x in no_recycling: x[0] = None try: for thunk, node in izip(thunks, order): thunk() except Exception: raise_with_op(node, thunk)
def test_known_grads(): # Tests that the grad method with no known_grads # matches what happens if you put its own known_grads # in for each variable full_range = theano.tensor.arange(10) x = theano.tensor.scalar('x') t = theano.tensor.iscalar('t') ft = full_range[t] ft.name = 'ft' coeffs = theano.tensor.vector('c') ct = coeffs[t] ct.name = 'ct' p = x ** ft p.name = 'p' y = ct * p y.name = 'y' cost = theano.tensor.sqr(y) cost.name = 'cost' layers = [ [cost], [y], [ct, p], [ct, x, ft], [coeffs, t, full_range, x] ] inputs = [coeffs, t, x] rng = np.random.RandomState([2012, 11, 15]) values = [rng.randn(10), rng.randint(10), rng.randn() ] values = [np.cast[ipt.dtype](value) for ipt, value in zip(inputs, values)] true_grads = theano.tensor.grad(cost, inputs, disconnected_inputs='ignore') true_grads = theano.function(inputs, true_grads) true_grads = true_grads(*values) for layer in layers: print('Testing by separately computing ', layer) first = theano.tensor.grad(cost, layer, disconnected_inputs='ignore') known = dict(izip(layer, first)) full = theano.tensor.grad(cost=None, known_grads=known, wrt=inputs, disconnected_inputs='ignore') full = theano.function(inputs, full) full = full(*values) assert len(true_grads) == len(full) for a, b, var in zip(true_grads, full, inputs): if not np.allclose(a, b): print('Failure') print(a) print(b) print(var) print(layer) for v in known: print(v, ':', theano.function(inputs, known[v])(*values)) assert False
def R_op(self, inputs, eval_points): if not self._rop_op_is_cached: self._recompute_rop_op() ret_ofg_l = self._rop_op( *(list(inputs) + list(eval_points)), return_list=True) ret_l = [ ret_ofg if ov is None else ov for ret_ofg, ov in izip( ret_ofg_l, self._rop_op_stypes_l)] return ret_l
def grad(self, inputs, output_grads): if not self._grad_op_is_cached: self._recompute_grad_op() ret_ofg_l = self._grad_op( *(list(inputs) + list(output_grads)), return_list=True) ret_l = [ ret_ofg if ov is None else ov for ret_ofg, ov in izip( ret_ofg_l, self._grad_op_stypes_l)] return ret_l
def grad_sources_inputs(sources, inputs): """ This implements the old grad_sources_inputs function in terms of the new interface so the tests don't need to be rewritten. """ if inputs is None: inputs = theano.gof.graph.inputs([source[0] for source in sources]) return dict(izip(inputs, theano.gradient.grad(cost=None, known_grads=dict(sources), wrt=inputs, consider_constant=inputs)))
def make_node(self, *inputs): _inputs = [gpu_contiguous(as_cuda_ndarray_variable(i)) for i in inputs] if self.nin > 0 and len(_inputs) != self.nin: raise TypeError("Wrong argument count", (self.nin, len(_inputs))) for i in _inputs[1:]: if i.type.ndim != inputs[0].type.ndim: raise TypeError("different ranks among inputs") if any([any(i.type.broadcastable) for i in inputs]): raise Exception("pycuda don't support broadcasted dimensions") assert len(inputs) == 2 # TODO remove otype = CudaNdarrayType(broadcastable=[False] * _inputs[0].type.ndim) assert self.nout == 1 fct_name = "pycuda_elemwise_%s" % str(self.scalar_op) out_node = Apply(self, _inputs, [otype() for o in xrange(self.nout)]) in_name = ["i" + str(id) for id in range(len(inputs))] out_name = ["o" + str(id) for id in range(self.nout)] c_code = self.scalar_op.c_code( out_node, "some_name", tuple([n + "[i]" for n in in_name]), tuple(n + "[i]" for n in out_name), {} ) c_code_param = ", ".join( [ _replace_npy_types(var.type.dtype_specs()[1]) + " *" + name for var, name in chain(izip(inputs, in_name), izip(out_node.outputs, out_name)) ] + ["int size"] ) mod = SourceModule( """ __global__ void %s(%s) { int i = (blockIdx.x+blockIdx.y*gridDim.x)*(blockDim.x*blockDim.y); i += threadIdx.x + threadIdx.y*blockDim.x; if(i<size){ %s } } """ % (fct_name, c_code_param, c_code) ) self.pycuda_fct = mod.get_function(fct_name) return out_node
def streamline_default_f(): for x in no_recycling: x[0] = None try: for thunk, node, old_storage in izip(thunks, order, post_thunk_old_storage): thunk() for old_s in old_storage: old_s[0] = None except Exception: raise_with_op(node, thunk)
def p(node, args, outs): pos = 0 cont = 1 # copy inputs if not inplace if not self.inplace: for _, _, val in state_buffers: val[0] = val[0].copy() for buf in non_numeric_states_bufs: buf[0] = buf[0].copy() # reset all switches if any for sw in self.switches: sw.set_value(numpy.int8(0), borrow=True) # set aux shared variables for var, val in aux_buffers: var.set_value(val[0], borrow=True) # set state shared variables for var, length, val in state_buffers: var.set_value(val[0], borrow=True) length.set_value(val[0].shape[0], borrow=True) self.index.set_value(numpy.int64(0)) # grab fixed arguments fix_args = [x[0] for x in non_tensor_buffers] while cont and pos < node_input_storage[0][0]: extra_args = [x[0] for x in non_numeric_states_bufs] rvals = self.fn(*(fix_args + extra_args)) for buf, rval in izip(non_numeric_states_bufs, rvals): buf[0] = rval cont = rvals[-1] pos = pos + 1 # We need to trim the outputs if they are longer for pos in xrange(n_numeric_values): buf = state_buffers[pos][2][0] mintap = self.mintaps[pos] if buf.shape[0] > pos + self.mintaps[pos]: node_output_storage[pos][0] = buf[:pos + mintap] else: node_output_storage[pos][0] = buf for out_buf, in_buf in izip( node_output_storage[n_numeric_values:], non_numeric_states_bufs): out_buf[0] = in_buf[0]
def make_node(self, *inputs): num_expected_inps = len(self.local_inputs) - len(self.shared_inputs) if len(inputs) != num_expected_inps: raise ValueError( "Expected %d inputs, got %d" % (num_expected_inps, len(inputs))) inputs = [inp_t.filter_variable(inp) for inp, inp_t in izip(inputs, self.input_types)] apply_node = gof.Apply( self, list(inputs) + self.shared_inputs, [type() for type in self.output_types]) apply_node.local_inputs = self.local_inputs apply_node.local_outputs = self.local_outputs return apply_node
def run_nnet(use_gpu, n_batch=60, n_in=1024, n_hid=2048, n_out=10, n_train=100): if config.mode == 'DEBUG_MODE': n_train = 1 if use_gpu: w = tcn.shared_constructor(0.01 * (my_rand(n_in, n_hid) - 0.5), 'w') b = tcn.shared_constructor(my_zeros(n_hid), 'b') v = tcn.shared_constructor(my_zeros((n_hid, n_out)), 'c') c = tcn.shared_constructor(my_zeros(n_out), 'c') else: w = shared(0.01 * (my_rand(n_in, n_hid) - 0.5), 'w') b = shared(my_zeros(n_hid), 'b') v = shared(my_zeros((n_hid, n_out)), 'c') c = shared(my_zeros(n_out), 'c') x = tensor.fmatrix('x') y = tensor.fmatrix('y') lr = tensor.fscalar('lr') hid = tensor.tanh(tensor.dot(x, w) + b) out = tensor.tanh(tensor.dot(hid, v) + c) loss = tensor.sum(0.5 * (out - y) ** 2 * lr) if 0: print('loss type', loss.type) params = [w, b, v, c] gparams = tensor.grad(loss, params) mode = get_mode(use_gpu) # print 'building pfunc ...' train = pfunc([x, y, lr], [loss], mode=mode, updates=[(p, p - g) for p, g in izip(params, gparams)]) if 0: for i, n in enumerate(train.maker.fgraph.toposort()): print(i, n) xval = my_rand(n_batch, n_in) yval = my_rand(n_batch, n_out) lr = theano._asarray(0.01, dtype='float32') t0 = time.time() rval = [] for i in xrange(n_train): rval.append(train(xval, yval, lr)) dt = time.time() - t0 print_mode(mode) return numpy.asarray(rval), dt
def thunk(): if not compute_map[cond][0]: return [0] else: truthval = storage_map[cond][0] if truthval != 0: ls = [idx + 1 for idx in xrange(self.n_outs) if not compute_map[ts[idx]][0]] if len(ls) > 0: return ls else: for out, t in izip(outputs, ts): compute_map[out][0] = 1 val = storage_map[t][0] if self.as_view: storage_map[out][0] = val # Work around broken numpy deepcopy elif type(val) in (numpy.ndarray, numpy.memmap): storage_map[out][0] = val.copy() else: storage_map[out][0] = deepcopy(val) return [] else: ls = [1 + idx + self.n_outs for idx in xrange(self.n_outs) if not compute_map[fs[idx]][0]] if len(ls) > 0: return ls else: for out, f in izip(outputs, fs): compute_map[out][0] = 1 # can't view both outputs unless destroyhandler # improves # Work around broken numpy deepcopy val = storage_map[f][0] if type(val) in (numpy.ndarray, numpy.memmap): storage_map[out][0] = val.copy() else: storage_map[out][0] = deepcopy(val) return []
def test_known_grads(): # Tests that the grad method with no known_grads # matches what happens if you put its own known_grads # in for each variable full_range = theano.tensor.arange(10) x = theano.tensor.scalar("x") t = theano.tensor.iscalar("t") ft = full_range[t] ft.name = "ft" coeffs = theano.tensor.vector("c") ct = coeffs[t] ct.name = "ct" p = x ** ft p.name = "p" y = ct * p y.name = "y" cost = theano.tensor.sqr(y) cost.name = "cost" layers = [[cost], [y], [ct, p], [ct, x, ft], [coeffs, t, full_range, x]] inputs = [coeffs, t, x] rng = np.random.RandomState([2012, 11, 15]) values = [rng.randn(10), rng.randint(10), rng.randn()] values = [np.cast[ipt.dtype](value) for ipt, value in zip(inputs, values)] true_grads = theano.tensor.grad(cost, inputs, disconnected_inputs="ignore") true_grads = theano.function(inputs, true_grads) true_grads = true_grads(*values) for layer in layers: first = theano.tensor.grad(cost, layer, disconnected_inputs="ignore") known = OrderedDict(izip(layer, first)) full = theano.tensor.grad(cost=None, known_grads=known, wrt=inputs, disconnected_inputs="ignore") full = theano.function(inputs, full) full = full(*values) assert len(true_grads) == len(full) for a, b, var in zip(true_grads, full, inputs): if not np.allclose(a, b): print("Failure") print(a) print(b) print(var) print(layer) for v in known: print(v, ":", theano.function(inputs, known[v])(*values)) assert False
def execute(*args): def e_arity(takes, got): return 'Function call takes exactly %i %s (%i given)' % ( takes, ['argument', 'arguments'][takes > 1], got) if (len(args) != len(inputs)): raise TypeError(e_arity(len(inputs), len(args))) for arg, variable in izip(args, inputs): variable.data = arg thunk() if unpack_single: return utils.to_return_values([variable.data for variable in outputs]) else: return [variable.data for variable in outputs]
def inline_ofg_expansion(node): """ This optimization expands internal graph of OpFromGraph. Only performed if node.op.is_inline == True Doing so can improve optimization at the cost of compilation speed. """ op = node.op if not isinstance(op, OpFromGraph): return False if not op.is_inline: return False return theano.clone( op.local_outputs, { u: v for u, v in izip( node.op.local_inputs, node.inputs)})
def make_thunk(self, **kwargs): no_recycling = self.no_recycling make_all = [self.linkers[0].make_all(**kwargs)] kwargs.pop('input_storage', None) make_all += [l.make_all(**kwargs) for l in self.linkers[1:]] fns, input_lists, output_lists, thunk_lists, order_lists \ = zip(*make_all) order_list0 = order_lists[0] for order_list in order_lists[1:]: if not order_list0 == order_list: raise Exception( "All linkers to WrapLinker should execute operations in the same order.") inputs0 = input_lists[0] outputs0 = output_lists[0] thunk_groups = list(zip(*thunk_lists)) order = [x[0] for x in zip(*order_lists)] to_reset = [] for thunks, node in izip(thunk_groups, order): for j, output in enumerate(node.outputs): if output in no_recycling: for thunk in thunks: to_reset.append(thunk.outputs[j]) wrapper = self.wrapper pre = self.pre def f(): for inputs in input_lists[1:]: for input1, input2 in izip(inputs0, inputs): input2.storage[0] = copy(input1.storage[0]) for x in to_reset: x[0] = None pre(self, [input.data for input in input_lists[0]], order, thunk_groups) for i, (thunks, node) in enumerate(izip(thunk_groups, order)): try: wrapper(i, node, *thunks) except Exception: raise_with_op(node, *thunks) f.thunk_groups = thunk_groups return f, inputs0, outputs0
def Lop(f, wrt, eval_points, consider_constant=None, disconnected_inputs='raise'): """ This Lop() has the same functionality of Theano.Lop() """ if type(eval_points) not in (list, tuple): eval_points = [eval_points] using_list = isinstance(wrt, list) using_tuple = isinstance(wrt, tuple) if not isinstance(f, (list, tuple)): f = [f] # make copies of f and grads so we don't modify the client's copy f = list(f) grads = list(eval_points) # var_grads = [] # for grad in grads: # var_grad = theano.shared(grad, name="let me see", allow_downcast=True) # var_grads += [var_grad] if not isinstance(wrt, (list, tuple)): wrt = [wrt] assert len(f) == len(grads) known = dict(izip(f, grads)) # print "I know nothing.", known, "\n\n\n", f, "\n\n\n", grads, type(grads[0]) ret = T.grad(cost=None, known_grads=known, consider_constant=consider_constant, wrt=wrt, disconnected_inputs=disconnected_inputs) # print "return value", ret[0], type(ret[0]) return format_as(using_list, using_tuple, ret)
def test_subgraph_grad(): # Tests that the grad method with no known_grads # matches what happens if you use successive subgraph_grads x = theano.tensor.fvector('x') t = theano.tensor.fvector('t') w1 = theano.shared(np.random.randn(3, 4)) w2 = theano.shared(np.random.randn(4, 2)) a1 = theano.tensor.tanh(theano.tensor.dot(x, w1)) a2 = theano.tensor.tanh(theano.tensor.dot(a1, w2)) cost2 = theano.tensor.sqr(a2 - t).sum() cost2 += theano.tensor.sqr(w2.sum()) cost1 = theano.tensor.sqr(w1.sum()) params = [[w2], [w1]] costs = [cost2, cost1] grad_ends = [[a1], [x]] inputs = [t, x] rng = np.random.RandomState([2012, 11, 15]) values = [rng.randn(2), rng.randn(3)] values = [np.cast[ipt.dtype](value) for ipt, value in zip(inputs, values)] wrt = [w2, w1] cost = cost2 + cost1 true_grads = theano.grad(cost, wrt) true_grads = theano.function(inputs, true_grads) true_grads = true_grads(*values) next_grad = None param_grads = [] for i in xrange(2): param_grad, next_grad = theano.subgraph_grad( wrt=params[i], end=grad_ends[i], start=next_grad, cost=costs[i] ) next_grad = OrderedDict(izip(grad_ends[i], next_grad)) param_grads.extend(param_grad) pgrads = theano.function(inputs, param_grads) pgrads = pgrads(*values) for true_grad, pgrad in zip(true_grads, pgrads): assert(np.sum(np.abs(true_grad - pgrad)) < 0.00001)
def reconstruct_graph(inputs, outputs, tag=None): """ Different interface to clone, that allows you to pass inputs. Compared to clone, this method always replaces the inputs with new variables of the same type, and returns those ( in the same order as the original inputs). """ if tag is None: tag = '' nw_inputs = [safe_new(x, tag) for x in inputs] givens = OrderedDict() for nw_x, x in izip(nw_inputs, inputs): givens[x] = nw_x allinputs = theano.gof.graph.inputs(outputs) for inp in allinputs: if isinstance(inp, theano.Constant): givens[inp] = inp.clone() nw_outputs = clone(outputs, replace=givens) return (nw_inputs, nw_outputs)