Exemplo n.º 1
0
    def test_multiple_out_grad(self):
        # Tests that we can compute the gradients through lazy if
        x1 = tensor.vector('x1')
        x2 = tensor.vector('x2')
        y1 = tensor.vector('y1')
        y2 = tensor.vector('y2')
        c = tensor.iscalar('c')
        z = ifelse(c, (x1, x2), (y1, y2))
        grads = tensor.grad(z[0].sum() + z[1].sum(),
                            [x1, x2, y1, y2])

        f = theano.function([c, x1, x2, y1, y2], grads)
        rng = numpy.random.RandomState(utt.fetch_seed())

        lens = [rng.randint(200) for i in range(4)]
        values = [numpy.asarray(rng.uniform(size=(l,)), theano.config.floatX)
                  for l in lens]
        outs_1 = f(1, *values)
        assert all([x.shape[0] == y for x, y in zip(outs_1, lens)])
        assert numpy.all(outs_1[0] == 1.)
        assert numpy.all(outs_1[1] == 1.)
        assert numpy.all(outs_1[2] == 0.)
        assert numpy.all(outs_1[3] == 0.)

        outs_0 = f(0, *values)
        assert all([x.shape[0] == y for x, y in zip(outs_1, lens)])
        assert numpy.all(outs_0[0] == 0.)
        assert numpy.all(outs_0[1] == 0.)
        assert numpy.all(outs_0[2] == 1.)
        assert numpy.all(outs_0[3] == 1.)
Exemplo n.º 2
0
    def __getitem__(self, args):
        if not isinstance(args, tuple):
            args = args,
        # Determine if advanced indexing is needed or not
        # The logic is already in Subtensor.convert: if it succeeds,
        # standard indexing is used; if it fails with
        # AdvancedIndexingError, advanced indexing
        advanced = False
        axis = None
        for i, arg in enumerate(args):
            try:
                if arg != numpy.newaxis:
                    theano.tensor.subtensor.Subtensor.convert(arg)
            except theano.tensor.subtensor.AdvancedIndexingError:
                if advanced:
                    axis = None
                    break
                else:
                    advanced = True
                    axis = i

        if advanced:
            if (axis is not None
                and all(a == slice(None) for a in args[:axis])
                and all(a == slice(None) for a in args[axis + 1:])
                and isinstance(args[axis], (
                        numpy.ndarray,
                        list,
                        TensorVariable,
                        TensorConstant,
                        theano.tensor.sharedvar.TensorSharedVariable))):
                return self.take(arg, axis)
            else:
                return theano.tensor.subtensor.AdvancedSubtensor()(self, *args)
        else:
            if numpy.newaxis in args:
                # None (aka np.newaxis) in numpy indexing means to add a
                # broadcastable dimension, which theano traditionally did with
                # the dimshuffle op.  The following code converts numpy-style
                # indexing on self to traditional [read: implemented] theano
                # indexing on a dimshuffled view of self.

                counter = 0
                pattern = []
                new_args = []
                for arg in args:
                    if arg == numpy.newaxis:
                        pattern.append('x')
                        new_args.append(slice(None, None, None))
                    else:
                        pattern.append(counter)
                        counter += 1
                        new_args.append(arg)
                view = self.dimshuffle(pattern)
                rval = view.__getitem__(tuple(new_args))
                return rval
            else:
                return theano.tensor.subtensor.Subtensor(args)(
                    self, *theano.tensor.subtensor.Subtensor.collapse(args,
                    lambda entry: isinstance(entry, Variable)))
Exemplo n.º 3
0
def local_gpuaalloc2(node):
    """
    Join(axis, {Alloc or HostFromGPU}, ...) -> Join(axis, GpuAlloc, Alloc, ...)

    Moves an alloc that is an input to join to the gpu.
    """
    if (isinstance(node.op, tensor.Alloc)
            and all(c != 'output' and c.op == tensor.join and all(
                i.owner and i.owner.op in [host_from_gpu, tensor.alloc]
                for i in c.inputs[1:]) for c, idx in node.outputs[0].clients)):
        return [host_from_gpu(gpu_alloc(*node.inputs))]
Exemplo n.º 4
0
Arquivo: opt.py Projeto: gyenney/Tools
def local_gpuaalloc2(node):
    """
    Join(axis, {Alloc or HostFromGPU}, ...) -> Join(axis, GpuAlloc, Alloc, ...)

    Moves an alloc that is an input to join to the gpu.
    """
    if (isinstance(node.op, tensor.Alloc) and
        all(c != 'output' and
            c.op == tensor.join and
            all(i.owner and
                i.owner.op in [host_from_gpu, tensor.alloc]
                for i in c.inputs[1:])
            for c, idx in node.outputs[0].clients)):
        return [host_from_gpu(gpu_alloc(*node.inputs))]
Exemplo n.º 5
0
        def local_opt(node):
            dev = theano.sandbox.gpuarray.init_dev.device
            if cuda_only and not dev.startswith('cuda'):
                return

            if type(node.op) in OP:

                # Either one of our inputs is on the gpu or
                # all of our client are on the gpu
                if (any([
                        i.owner and i.owner.op == host_from_gpu
                        for i in node.inputs
                ]) or all([
                        c != 'output' and c.op == gpu_from_host
                        for c, idx in node.outputs[0].clients
                ])):
                    new_op = maker(node)
                    # This is needed as sometimes new_op inherit from OP.
                    if new_op and new_op != node.op:
                        if isinstance(new_op, theano.Op):
                            return [
                                safe_to_cpu(o)
                                for o in new_op(*node.inputs, return_list=True)
                            ]
                        elif isinstance(new_op, (tuple, list)):
                            return [safe_to_cpu(o) for o in new_op]
                        else:  # suppose it is a variable on the GPU
                            return [host_from_gpu(new_op)]
            return False
Exemplo n.º 6
0
 def is_outputs(elem):
     if (isinstance(elem, (list, tuple)) and
         all([isinstance(x, theano.Variable) for x in elem])):
         return True
     if isinstance(elem, theano.Variable):
         return True
     return False
Exemplo n.º 7
0
def test_give_variables_names_small():
    x = theano.tensor.matrix("x")
    y = theano.tensor.dot(x, x)
    fgraph = theano.FunctionGraph((x,), (y,))
    give_variables_names(fgraph.variables)
    assert all(var.name for var in fgraph.variables)
    assert unique([var.name for var in fgraph.variables])
Exemplo n.º 8
0
def test_zeros_basic():
    for shp in [(3, 4, 5), (300,), (), (0, 7)]:
        _a = cuda_ndarray.CudaNdarray.zeros(shp)
        _n = numpy.zeros(shp, dtype="float32")
        assert numpy.allclose(numpy.asarray(_a), _n)
        assert _a.shape == _n.shape
        assert all(_a._strides == numpy.asarray(_n.strides) / 4)

    # TODO:The following don't have the same stride!
    #      This should be fixed with the new GpuNdArray.
    for shp in [(3, 0), (4, 1, 5)]:
        _a = cuda_ndarray.CudaNdarray.zeros(shp)
        _n = numpy.zeros(shp, dtype="float32")
        assert numpy.allclose(numpy.asarray(_a), _n)
        assert _a.shape == _n.shape

    try:
        _n = numpy.zeros()
    except TypeError:
        pass
    else:
        raise Exception("An error was expected!")
    try:
        _a = cuda_ndarray.CudaNdarray.zeros()
    except TypeError:
        pass
    else:
        raise Exception("An error was expected!")
Exemplo n.º 9
0
def test_zeros_basic():
    for shp in [(3, 4, 5), (300, ), (), (0, 7)]:
        _a = cuda_ndarray.CudaNdarray.zeros(shp)
        _n = numpy.zeros(shp, dtype="float32")
        assert numpy.allclose(numpy.asarray(_a), _n)
        assert _a.shape == _n.shape
        assert all(_a._strides == numpy.asarray(_n.strides) / 4)

    # TODO:The following don't have the same stride!
    #      This should be fixed with the new GpuNdArray.
    for shp in [(3, 0), (4, 1, 5)]:
        _a = cuda_ndarray.CudaNdarray.zeros(shp)
        _n = numpy.zeros(shp, dtype="float32")
        assert numpy.allclose(numpy.asarray(_a), _n)
        assert _a.shape == _n.shape

    try:
        _n = numpy.zeros()
    except TypeError:
        pass
    else:
        raise Exception("An error was expected!")
    try:
        _a = cuda_ndarray.CudaNdarray.zeros()
    except TypeError:
        pass
    else:
        raise Exception("An error was expected!")
Exemplo n.º 10
0
 def is_outputs(elem):
     if (isinstance(elem, (list, tuple))
             and all([isinstance(x, theano.Variable) for x in elem])):
         return True
     if isinstance(elem, theano.Variable):
         return True
     return False
Exemplo n.º 11
0
def test_give_variables_names_small():
    x = theano.tensor.matrix('x')
    y = theano.tensor.dot(x, x)
    fgraph = theano.FunctionGraph((x, ), (y, ))
    give_variables_names(fgraph.variables)
    assert all(var.name for var in fgraph.variables)
    assert unique([var.name for var in fgraph.variables])
Exemplo n.º 12
0
    def normal(self, size=None, avg=0.0, std=1.0, ndim=None,
            dtype=config.floatX):
        """
        Return symbolic tensor of normally-distributed numbers.

        :param: size: Can be a list of integer or Theano variable(ex: the shape
            of other Theano Variable)
        """
        if isinstance(size, tuple):
            msg = "size must be a tuple of int or a Theano variable"
            assert all([isinstance(i, int) or isinstance(i, Variable)
                for i in size]), msg
        else:
            msg = "size must be a tuple of int or a Theano variable"
            assert isinstance(size, Variable) and size.ndim == 1, msg
        generator = theano.shared(False)  # makes a generic
        s_size = theano.tensor.as_tensor_variable(size)
        u = CURAND_Normal.new_auto_update(generator, ndim, dtype, s_size,
                self.next_seed())
        self.state_updates.append(u.update)
        rval = u * std + avg
        if u.type.broadcastable != rval.type.broadcastable:
            raise NotImplementedError(
                'Increase the size to match the broadcasting pattern of `low`'
                'and `high` arguments'
            )
        return  rval
Exemplo n.º 13
0
def test_give_variables_names():
    x = theano.tensor.matrix('x')
    y = x + 1
    z = theano.tensor.dot(x, y)
    variables = (x, y, z)
    give_variables_names(variables)
    assert all(var.name for var in variables)
    assert unique([var.name for var in variables])
Exemplo n.º 14
0
 def is_updates(elem):
     if isinstance(elem, dict):
         return True
     # Dictionaries can be given as lists of tuples
     if (isinstance(elem, (list, tuple)) and all(
         [isinstance(x, (list, tuple)) and len(x) == 2 for x in elem])):
         return True
     return False
Exemplo n.º 15
0
def test_give_variables_names():
    x = theano.tensor.matrix("x")
    y = x + 1
    z = theano.tensor.dot(x, y)
    variables = (x, y, z)
    give_variables_names(variables)
    assert all(var.name for var in variables)
    assert unique([var.name for var in variables])
Exemplo n.º 16
0
def test_mpi_tag_ordering():
    x = recv((2, 2), 'float32', 1, 12)
    y = recv((2, 2), 'float32', 1, 11)
    z = recv((2, 2), 'float32', 1, 13)
    f = theano.function([], [x, y, z], mode=mpi_mode)
    nodes = f.maker.linker.make_all()[-1]

    assert all(node.op.tag == tag
               for node, tag in zip(nodes, (11, 12, 13, 11, 12, 13)))
Exemplo n.º 17
0
 def is_updates(elem):
     if isinstance(elem, dict):
         return True
     # Dictionaries can be given as lists of tuples
     if (isinstance(elem, (list, tuple)) and
         all([isinstance(x, (list, tuple)) and len(x) == 2
              for x in elem])):
         return True
     return False
Exemplo n.º 18
0
 def __setup_node__(self, node):
     # sets up node so it belongs to this fgraph
     if hasattr(node, 'fgraph') and node.fgraph is not self:
         raise Exception("%s is already owned by another fgraph" % node)
     if (hasattr(node.op, 'view_map') and
         not all([isinstance(view, (list, tuple))
                  for view in node.op.view_map.values()])):
         raise Exception("Op '%s' have a bad view map '%s',"
                         " the values must be tuples or lists." % (
                             str(node.op), str(node.op.view_map)))
     if (hasattr(node.op, 'destroy_map') and
         not all([isinstance(destroy, (list, tuple))
                  for destroy in node.op.destroy_map.values()])):
         raise Exception("Op '%s' have a bad destroy map '%s',"
                         " the values must be tuples or lists." % (
                             str(node.op), str(node.op.destroy_map)))
     node.fgraph = self
     node.deps = {}
Exemplo n.º 19
0
def test_mpi_tag_ordering():
    x = recv((2, 2), 'float32', 1, 12)
    y = recv((2, 2), 'float32', 1, 11)
    z = recv((2, 2), 'float32', 1, 13)
    f = theano.function([], [x, y, z], mode=mpi_mode)
    nodes = f.maker.linker.make_all()[-1]

    assert all(node.op.tag == tag
            for node, tag in zip(nodes, (11, 12, 13, 11, 12, 13)))
Exemplo n.º 20
0
 def __setup_node__(self, node):
     # sets up node so it belongs to this fgraph
     if hasattr(node, 'fgraph') and node.fgraph is not self:
         raise Exception("%s is already owned by another fgraph" % node)
     if (hasattr(node.op, 'view_map') and not all([
             isinstance(view, (list, tuple))
             for view in node.op.view_map.values()
     ])):
         raise Exception("Op '%s' have a bad view map '%s',"
                         " the values must be tuples or lists." %
                         (str(node.op), str(node.op.view_map)))
     if (hasattr(node.op, 'destroy_map') and not all([
             isinstance(destroy, (list, tuple))
             for destroy in node.op.destroy_map.values()
     ])):
         raise Exception("Op '%s' have a bad destroy map '%s',"
                         " the values must be tuples or lists." %
                         (str(node.op), str(node.op.destroy_map)))
     node.fgraph = self
     node.deps = {}
Exemplo n.º 21
0
    def with_linker(self, linker):
        for xsh, shuffle, zsh in [((2, 3), (1, 'x', 0), (3, 1, 2)),
                                  ((1, 2, 3), (1, 2), (2, 3)),
                                  ((1, 2, 1, 3), (1, 3), (2, 3)),
                                  ((2, 3, 4), (2, 1, 0), (4, 3, 2)),
                                  ((2, 3, 4), ('x', 2, 1, 0, 'x'),
                                   (1, 4, 3, 2, 1)),
                                  ((1, 4, 3, 2, 1), (3, 2, 1), (2, 3, 4)),
                                  ((1, 1, 4), (1, 2), (1, 4)),
                                  ((1, 1, 1), (), ()),
                                  ((1,), ('x', 'x'), (1, 1))]:
            ib = [(entry == 1) for entry in xsh]
            x = TensorType('float64', ib)('x')
            e = self.op(ib, shuffle)(x)
            f = copy(linker).accept(FunctionGraph([x], [e])).make_function()
            assert f(numpy.ones(xsh)).shape == zsh
            #test that DimShuffle.infer_shape work correctly
            x = TensorType('float64', ib)('x')
            e = self.op(ib, shuffle)(x)
            f = copy(linker).accept(FunctionGraph([x],
                                                  [e.shape])).make_function()
            assert all(f(numpy.ones(xsh))) == all(zsh)

        # Test when we drop a axis that is not broadcastable
        ib = [False, True, False]
        x = TensorType('float64', ib)('x')
        self.assertRaises(ValueError, self.op, ib, shuffle)

        # Test when we drop a axis that don't have shape 1
        ib = [True, True, False]
        x = TensorType('float64', ib)('x')
        e = self.op(ib, (1, 2))(x)
        f = copy(linker).accept(FunctionGraph([x], [e.shape])).make_function()
        self.assertRaises(TypeError, f, numpy.ones((2, 1, 4)))

        # Test that we can't take a dimensions multiple time
        xsh, shuffle, zsh = ((1, 1, 4), (0, 1, 2, 0), (1, 4))
        ib = [False, True, False]
        x = TensorType('float64', ib)('x')
        self.assertRaises(ValueError, DimShuffle, ib, shuffle)
Exemplo n.º 22
0
def test_mpi_schedule():
    x = theano.tensor.matrix('x')
    y = send(x, 1, 11)
    z = x + x
    waitnode = y.owner
    sendnode = y.owner.inputs[0].owner
    addnode = z.owner

    f = theano.function([x], [y, z], mode=mpi_mode)
    nodes = f.maker.linker.make_all()[-1]
    optypes = [MPISend, theano.tensor.Elemwise, MPISendWait]
    assert all(isinstance(node.op, optype)
            for node, optype in zip(nodes, optypes))
Exemplo n.º 23
0
def test_mpi_schedule():
    x = theano.tensor.matrix('x')
    y = send(x, 1, 11)
    z = x + x
    waitnode = y.owner
    sendnode = y.owner.inputs[0].owner
    addnode = z.owner

    f = theano.function([x], [y, z], mode=mpi_mode)
    nodes = f.maker.linker.make_all()[-1]
    optypes = [MPISend, theano.tensor.Elemwise, MPISendWait]
    assert all(
        isinstance(node.op, optype) for node, optype in zip(nodes, optypes))
Exemplo n.º 24
0
        def test_specify_shape_inplace(self):
            #test that specify_shape don't break inserting inplace op

            dtype = self.dtype
            if dtype is None:
                dtype = theano.config.floatX

            rng = numpy.random.RandomState(utt.fetch_seed())
            a = numpy.asarray(rng.uniform(1,2,[40,40]),dtype=dtype)
            a = self.cast_value(a)
            a_shared = self.shared_constructor(a)
            b = numpy.asarray(rng.uniform(1,2,[40,40]),dtype=dtype)
            b = self.cast_value(b)
            b_shared = self.shared_constructor(b)
            s = numpy.zeros((40,40),dtype=dtype)
            s = self.cast_value(s)
            s_shared = self.shared_constructor(s)
            f = theano.function([],
                                updates=[(s_shared, theano.dot(a_shared,b_shared)
                                         +s_shared)])
            topo=f.maker.fgraph.toposort()
            f()
            #[Gemm{inplace}(<TensorType(float64, matrix)>, 0.01, <TensorType(float64, matrix)>, <TensorType(float64, matrix)>, 2e-06)]
            if theano.config.mode!='FAST_COMPILE':
                assert sum([node.op.__class__.__name__ in ["Gemm","GpuGemm","StructuredDot"] for node in topo])==1
                assert all(node.op == tensor.blas.gemm_inplace for node in topo if isinstance(node.op,tensor.blas.Gemm))
                assert all(node.op.inplace for node in topo if node.op.__class__.__name__ == "GpuGemm")
            #Their is no inplace gemm for sparse
            #assert all(node.op.inplace for node in topo if node.op.__class__.__name__ == "StructuredDot")
            s_shared_specify = tensor.specify_shape(s_shared, s_shared.get_value(borrow=True).shape)

            #now test with the specify shape op in the output
            f = theano.function([], s_shared.shape,
                                updates=[(s_shared, theano.dot(a_shared,b_shared)
                                         +s_shared_specify)])
            topo=f.maker.fgraph.toposort()
            shp=f()
            assert numpy.all(shp == (40,40))
            if theano.config.mode!='FAST_COMPILE':
                assert sum([node.op.__class__.__name__ in ["Gemm","GpuGemm","StructuredDot"] for node in topo])==1
                assert all(node.op == tensor.blas.gemm_inplace for node in topo if isinstance(node.op,tensor.blas.Gemm))
                assert all(node.op.inplace for node in topo if node.op.__class__.__name__ == "GpuGemm")
            #now test with the specify shape op in the inputs and outputs
            a_shared = tensor.specify_shape(a_shared,
                    a_shared.get_value(borrow=True).shape)
            b_shared = tensor.specify_shape(b_shared,
                    b_shared.get_value(borrow=True).shape)

            f = theano.function([], s_shared.shape,
                                updates=[(s_shared, theano.dot(a_shared,b_shared)
                                         +s_shared_specify)])
            topo=f.maker.fgraph.toposort()
            shp=f()
            assert numpy.all(shp == (40,40))
            if theano.config.mode!='FAST_COMPILE':
                assert sum([node.op.__class__.__name__ in ["Gemm","GpuGemm","StructuredDot"] for node in topo])==1
                assert all(node.op == tensor.blas.gemm_inplace for node in topo if isinstance(node.op,tensor.blas.Gemm))
                assert all(node.op.inplace for node in topo if node.op.__class__.__name__ == "GpuGemm")
Exemplo n.º 25
0
        def test_specify_shape_inplace(self):
            # test that specify_shape don't break inserting inplace op

            dtype = self.dtype
            if dtype is None:
                dtype = theano.config.floatX

            rng = numpy.random.RandomState(utt.fetch_seed())
            a = numpy.asarray(rng.uniform(1, 2, [40, 40]), dtype=dtype)
            a = self.cast_value(a)
            a_shared = self.shared_constructor(a)
            b = numpy.asarray(rng.uniform(1, 2, [40, 40]), dtype=dtype)
            b = self.cast_value(b)
            b_shared = self.shared_constructor(b)
            s = numpy.zeros((40, 40), dtype=dtype)
            s = self.cast_value(s)
            s_shared = self.shared_constructor(s)
            f = theano.function([],
                                updates=[(s_shared, theano.dot(a_shared, b_shared)
                                         + s_shared)])
            topo = f.maker.fgraph.toposort()
            f()
            #[Gemm{inplace}(<TensorType(float64, matrix)>, 0.01, <TensorType(float64, matrix)>, <TensorType(float64, matrix)>, 2e-06)]
            if theano.config.mode != 'FAST_COMPILE':
                assert sum([node.op.__class__.__name__ in ["Gemm", "GpuGemm", "StructuredDot"] for node in topo]) == 1
                assert all(node.op == tensor.blas.gemm_inplace for node in topo if isinstance(node.op, tensor.blas.Gemm))
                assert all(node.op.inplace for node in topo if node.op.__class__.__name__ == "GpuGemm")
            # Their is no inplace gemm for sparse
            #assert all(node.op.inplace for node in topo if node.op.__class__.__name__ == "StructuredDot")
            s_shared_specify = tensor.specify_shape(s_shared, s_shared.get_value(borrow=True).shape)

            # now test with the specify shape op in the output
            f = theano.function([], s_shared.shape,
                                updates=[(s_shared, theano.dot(a_shared, b_shared)
                                         + s_shared_specify)])
            topo = f.maker.fgraph.toposort()
            shp = f()
            assert numpy.all(shp == (40, 40))
            if theano.config.mode != 'FAST_COMPILE':
                assert sum([node.op.__class__.__name__ in ["Gemm", "GpuGemm", "StructuredDot"] for node in topo]) == 1
                assert all(node.op == tensor.blas.gemm_inplace for node in topo if isinstance(node.op, tensor.blas.Gemm))
                assert all(node.op.inplace for node in topo if node.op.__class__.__name__ == "GpuGemm")
            # now test with the specify shape op in the inputs and outputs
            a_shared = tensor.specify_shape(a_shared,
                    a_shared.get_value(borrow=True).shape)
            b_shared = tensor.specify_shape(b_shared,
                    b_shared.get_value(borrow=True).shape)

            f = theano.function([], s_shared.shape,
                                updates=[(s_shared, theano.dot(a_shared, b_shared)
                                         + s_shared_specify)])
            topo = f.maker.fgraph.toposort()
            shp = f()
            assert numpy.all(shp == (40, 40))
            if theano.config.mode != 'FAST_COMPILE':
                assert sum([node.op.__class__.__name__ in ["Gemm", "GpuGemm", "StructuredDot"] for node in topo]) == 1
                assert all(node.op == tensor.blas.gemm_inplace for node in topo if isinstance(node.op, tensor.blas.Gemm))
                assert all(node.op.inplace for node in topo if node.op.__class__.__name__ == "GpuGemm")
Exemplo n.º 26
0
 def is_updates(elem):
     if isinstance(elem, dict):
         # Make sure the updates will be applied in a deterministic order
         if (not isinstance(elem, compat.python2x.OrderedDict)
                 and len(elem) > 1):
             warnings.warn("Expected OrderedDict or OrderedUpdates, got "\
                     + str(type(elem)) + ". This can make your script non-"
                     "deterministic.")
         return True
     # Dictionaries can be given as lists of tuples
     if (isinstance(elem, (list, tuple)) and all(
         [isinstance(x, (list, tuple)) and len(x) == 2 for x in elem])):
         return True
     return False
Exemplo n.º 27
0
 def is_updates(elem):
     if isinstance(elem, dict):
         # Make sure the updates will be applied in a deterministic order
         if (not isinstance(elem, compat.python2x.OrderedDict) and
             len(elem) > 1):
             warnings.warn("Expected OrderedDict or OrderedUpdates, got "\
                     + str(type(elem)) + ". This can make your script non-"
                     "deterministic.")
         return True
     # Dictionaries can be given as lists of tuples
     if (isinstance(elem, (list, tuple)) and
         all([isinstance(x, (list, tuple)) and len(x) == 2
              for x in elem])):
         return True
     return False
Exemplo n.º 28
0
    def filter(x):
        """
        Ensure `x` is made only of allowed data types.

        Return True iff `x` is made only of lists, tuples, dictionaries, Theano
        variables or `theano.scan_module.until` objects.
        """
        # Is `x` a container we can iterate on?
        iter_on = None
        if isinstance(x, list) or isinstance(x, tuple):
            iter_on = x
        elif isinstance(x, dict):
            iter_on = x.iteritems()
        if iter_on is not None:
            return all(filter(y) for y in iter_on)
        else:
            return (isinstance(x, theano.Variable) or
                    isinstance(x, theano.scan_module.until))
Exemplo n.º 29
0
    def filter(x):
        """
        Ensure `x` is made only of allowed data types.

        Return True iff `x` is made only of lists, tuples, dictionaries, Theano
        variables or `theano.scan_module.until` objects.
        """
        # Is `x` a container we can iterate on?
        iter_on = None
        if isinstance(x, list) or isinstance(x, tuple):
            iter_on = x
        elif isinstance(x, dict):
            iter_on = x.iteritems()
        if iter_on is not None:
            return all(filter(y) for y in iter_on)
        else:
            return (isinstance(x, theano.Variable)
                    or isinstance(x, theano.scan_module.until))
Exemplo n.º 30
0
Arquivo: opt.py Projeto: gyenney/Tools
        def local_opt(node):
            dev = theano.sandbox.gpuarray.init_dev.device
            if cuda_only and not dev.startswith('cuda'):
                return

            if type(node.op) in OP:

                # Either one of our inputs is on the gpu or
                # all of our client are on the gpu
                if (any([i.owner and i.owner.op == host_from_gpu
                         for i in node.inputs]) or
                    all([c != 'output' and c.op == gpu_from_host
                         for c, idx in node.outputs[0].clients])):
                    new_op = maker(node)
                    # This is needed as sometimes new_op inherit from OP.
                    if new_op and new_op != node.op:
                        if isinstance(new_op, theano.Op):
                            return [safe_to_cpu(o) for o in
                                    new_op(*node.inputs, return_list=True)]
                        elif isinstance(new_op, (tuple, list)):
                            return [safe_to_cpu(o) for o in new_op]
                        else:  # suppose it is a variable on the GPU
                            return [host_from_gpu(new_op)]
            return False
Exemplo n.º 31
0
 def uniform(self, size, low=0.0, high=1.0, ndim=None,
         dtype=config.floatX):
     """
     Return symbolic tensor of uniform numbers.
     """
     if isinstance(size, tuple):
         msg = "size must be a tuple of int or a Theano variable"
         assert all([isinstance(i, int) or isinstance(i, Variable)
             for i in size]), msg
     else:
         msg = "size must be a tuple of int or a Theano variable"
         assert isinstance(size, Variable) and size.ndim == 1, msg
     generator = theano.shared(False)  # makes a generic
     s_size = theano.tensor.as_tensor_variable(size)
     u = CURAND_Uniform.new_auto_update(generator, ndim, dtype, s_size,
             self.next_seed())
     self.state_updates.append(u.update)
     rval = u * (high - low) + low
     if u.type.broadcastable != rval.type.broadcastable:
         raise NotImplementedError(
             'Increase the size to match the broadcasting pattern of '
             'low and `high` arguments'
         )
     return  rval
Exemplo n.º 32
0
    def with_linker(self, linker, scalar_op=scalar.add, dtype="floatX",
                    pre_scalar_op=None,
                    test_nan=False, tensor_op=None):
        for xsh, tosum in self.cases:
            if dtype == "floatX":
                dtype = theano.config.floatX
            x = TensorType(dtype, [(entry == 1) for entry in xsh])('x')
            d = {}
            if pre_scalar_op is not None:
                d = {"pre_scalar_op": pre_scalar_op}
            if tensor_op is None:
                e = as_tensor_variable(self.op(scalar_op, axis=tosum, **d)(x))
            else:
                e = as_tensor_variable(tensor_op(x, axis=tosum, **d))

            if tosum is None:
                tosum = range(len(xsh))

            f = copy(linker).accept(FunctionGraph([x], [e])).make_function()
            xv = numpy.asarray(numpy.random.rand(*xsh))

            if not "int" in dtype:
                xv = numpy.asarray(xv, dtype=dtype)
            else:
                xv = numpy.asarray(xv < 0.5, dtype=dtype)

            if test_nan and xv.size > 0:
                if len(xsh) > 0:
                    xv = xv.flatten()
                    xv[0] = numpy.nan
                    xv = xv.reshape(*xsh)
                else:
                    xv = numpy.asarray(numpy.nan, dtype=dtype)
            zv = xv
            if pre_scalar_op is not None:
                zv = Elemwise(scalar_op=pre_scalar_op)(x).eval({x: xv})
            numpy_raised = False
            if len(tosum) > 1 and any([a < 0 for a in tosum]):
                #In that case, we need to use the good order of axis
                #in the reduction.
                axis2 = []
                for a in tosum:
                    if a < 0:
                        axis2.append(a + len(xsh))
                    else:
                        axis2.append(a)
                assert len(axis2) == len(tosum)
                tosum = tuple(axis2)
            if tensor_op == tensor.all:
                for axis in reversed(sorted(tosum)):
                    zv = numpy.all(zv, axis)
                if len(tosum) == 0:
                    zv = zv != 0
            elif tensor_op == tensor.any:
                for axis in reversed(sorted(tosum)):
                    zv = numpy.any(zv, axis)
                if len(tosum) == 0:
                    zv = zv != 0
            elif scalar_op == scalar.add:
                for axis in reversed(sorted(tosum)):
                    zv = numpy.add.reduce(zv, axis)
            elif scalar_op == scalar.mul:
                for axis in reversed(sorted(tosum)):
                    zv = numpy.multiply.reduce(zv, axis)
            elif scalar_op == scalar.maximum:
                try:
                    for axis in reversed(sorted(tosum)):
                        zv = numpy.maximum.reduce(zv, axis)
                except ValueError:
                    numpy_raised = True
            elif scalar_op == scalar.minimum:
                try:
                    for axis in reversed(sorted(tosum)):
                        zv = numpy.minimum.reduce(zv, axis)
                except ValueError:
                    numpy_raised = True
            elif scalar_op == scalar.or_:
                for axis in reversed(sorted(tosum)):
                    zv = numpy.bitwise_or.reduce(zv, axis)
            elif scalar_op == scalar.and_:
                for axis in reversed(sorted(tosum)):
                    zv = numpy.bitwise_and.reduce(zv, axis)
            elif scalar_op == scalar.xor:
                # There is no identity value for the xor function
                # So we can't support shape of dimensions 0.
                if numpy.prod(zv.shape) == 0:
                    continue
                for axis in reversed(sorted(tosum)):
                    zv = numpy.bitwise_xor.reduce(zv, axis)
            else:
                raise Exception(
                    "Test for CAReduce with scalar_op %s not implemented" %
                    str(scalar_op))
            if scalar_op in [scalar.maximum, scalar.minimum] and numpy_raised:
                try:
                    out = f(xv)
                    assert out.dtype == dtype
                except ValueError:
                    pass
                else:
                    self.fail()
            else:
                # numpy.{all,any} return bool type,
                # but theano ops return an int8 array instead
                if scalar_op in [scalar.and_, scalar.or_]:
                    zv = numpy.asarray(zv, dtype='int8')
                if test_nan:
                    try:
                        self.assertTrue(
                            theano.tensor.TensorType.values_eq(f(xv), zv),
                            (f(xv), zv))
                    except NotImplementedError:
                        # GpuCAReduce don't implement all cases when size is 0
                        assert xv.size == 0
                else:
                    try:
                        f_xv = f(xv)
                        self.assertTrue((f_xv.shape == zv.shape), (f_xv, zv))
                        self.assertTrue(numpy.allclose(f_xv, zv),
                                        (f_xv, zv, xsh, tosum))
                    except NotImplementedError:
                        # GpuCAReduce don't implement all cases when size is 0
                        assert xv.size == 0

            x = TensorType(dtype, [(entry == 1) for entry in xsh])('x')
            if tensor_op is None:
                e = self.op(scalar_op, axis=tosum)(x)
            else:
                e = tensor_op(x, axis=tosum)
            if tosum is None:
                tosum = range(len(xsh))
            f = copy(linker).accept(FunctionGraph([x],
                                                  [e.shape])).make_function()
            if not(scalar_op in [scalar.maximum, scalar.minimum] and
                   ((xsh == () or numpy.prod(xsh) == 0))):
                try:
                    assert all(f(xv) == zv.shape)
                except NotImplementedError:
                    # GpuCAReduce don't implement all cases when size is 0
                    assert xv.size == 0
Exemplo n.º 33
0
                for oy in N.arange(lbound[0], ubound[0], dy):
                    # loop over output image width
                    for ox in N.arange(lbound[1], ubound[1], dx):

                        # kern[l] is filter value to apply at (oj,oi)
                        # for (iy,ix)
                        l = 0

                        # ... ITERATE OVER INPUT UNITS IN RECEPTIVE FIELD
                        for ky in oy + N.arange(kshp[0]):
                            for kx in ox + N.arange(kshp[1]):

                                # verify if we are still within image
                                # boundaries. Equivalent to
                                # zero-padding of the input image
                                if (all((ky, kx) >= topleft)
                                        and all((ky, kx) < botright)):

                                    # convert to "valid" input space
                                    # coords used to determine column
                                    # index to write to in sparse mat
                                    iy, ix = N.array((ky, kx)) - topleft
                                    # determine raster-index of input pixel...

                                    # taking into account multiple
                                    # input features
                                    col = iy * inshp[2] + ix + \
                                          fmapi * N.prod(inshp[1:])

                                    # convert oy,ox values to output
                                    # space coordinates
Exemplo n.º 34
0
def calculate_reallocate_info(order, fgraph, storage_map, compute_map_re, dependencies):
    reallocated_info = {}
    viewed_by = {}
    for var in fgraph.variables:
        viewed_by[var] = []
    view_of = {}
    pre_allocated = set([])
    allocated = set([])

    for idx in range(len(order)):
        node = order[idx]
        dmap = getattr(node.op, 'destroy_map', None)
        vmap = getattr(node.op, 'view_map', None)

        idx_o = 0
        for out in node.outputs:
            for var in node.outputs:
                compute_map_re[var][0] = 1
            ins = None
            if dmap and idx_o in dmap:
                idx_v = dmap[idx_o]
                assert len(
                    idx_v) == 1, "Here we only support the possibility to destroy one input"
                ins = node.inputs[idx_v[0]]
            if vmap and idx_o in vmap:
                assert ins is None
                idx_v = vmap[idx_o]
                assert len(
                    idx_v) == 1, "Here we only support the possibility to view one input"
                ins = node.inputs[idx_v[0]]
            if ins is not None:
                assert isinstance(ins, theano.Variable)
                origin = view_of.get(ins, ins)
                view_of[out] = origin
                viewed_by[origin].append(out)
            idx_o += 1

        for ins in node.inputs:
            assert not (ins in view_of and viewed_by[ins])
            if (getattr(ins, 'ndim', None) == 0 and not storage_map[ins][0]
                    and ins not in fgraph.outputs and ins.owner
                    and all([compute_map_re[v][0] for v in dependencies.get(ins, [])])
                    and ins not in allocated):
                # Constant Memory cannot be changed
                # Constant and shared variables' storage_map value is not empty
                reuse_out = None
                if ins not in view_of and not viewed_by.get(ins, []):
                    # where gc
                    for i in range(idx + 1, len(order)):
                        if reuse_out:
                            break
                        for out in order[i].outputs:
                            if (getattr(out, 'ndim', None) == 0 and out not in pre_allocated
                                    and ins.type == out.type):
                                reuse_out = out
                                pre_allocated.add(out)
                                allocated.add(ins)
                elif ins in view_of:
                    origin = view_of[ins]
                    if ins in viewed_by[origin]:
                        viewed_by[origin].remove(ins)
                    if (not viewed_by[origin] and
                            origin not in fgraph.inputs and
                            not isinstance(origin, theano.Constant)):
                        # where gc
                        for i in range(idx + 1, len(order)):
                            if reuse_out:
                                break
                            for out in order[i].outputs:
                                if (getattr(out, 'ndim', None) == 0 and out not in pre_allocated
                                        and ins.type == out.type):
                                    reuse_out = out
                                    pre_allocated.add(out)
                                    allocated.add(ins)

                if reuse_out:
                    reallocated_info[ins] = [ins, reuse_out]

    return reallocated_info
Exemplo n.º 35
0
                for oy in N.arange(lbound[0], ubound[0], dy):
                    # loop over output image width
                    for ox in N.arange(lbound[1], ubound[1], dx):

                        # kern[l] is filter value to apply at (oj,oi)
                        # for (iy,ix)
                        l = 0

                        # ... ITERATE OVER INPUT UNITS IN RECEPTIVE FIELD
                        for ky in oy + N.arange(kshp[0]):
                            for kx in ox + N.arange(kshp[1]):

                                # verify if we are still within image
                                # boundaries. Equivalent to
                                # zero-padding of the input image
                                if all((ky, kx) >= topleft) and all((ky, kx) < botright):

                                    # convert to "valid" input space
                                    # coords used to determine column
                                    # index to write to in sparse mat
                                    iy, ix = N.array((ky, kx)) - topleft
                                    # determine raster-index of input pixel...

                                    # taking into account multiple
                                    # input features
                                    col = iy * inshp[2] + ix + fmapi * N.prod(inshp[1:])

                                    # convert oy,ox values to output
                                    # space coordinates
                                    if mode == "full":
                                        (y, x) = (oy, ox)
Exemplo n.º 36
0
Arquivo: vm.py Projeto: pawef/Theano
def calculate_reallocate_info(order, fgraph, storage_map, compute_map_re,
                              dependencies):
    reallocated_info = {}
    viewed_by = {}
    for var in fgraph.variables:
        viewed_by[var] = []
    view_of = {}
    pre_allocated = set([])
    allocated = set([])

    for idx in range(len(order)):
        node = order[idx]
        dmap = getattr(node.op, 'destroy_map', None)
        vmap = getattr(node.op, 'view_map', None)

        idx_o = 0
        for out in node.outputs:
            for var in node.outputs:
                compute_map_re[var][0] = 1
            ins = None
            if dmap and idx_o in dmap:
                idx_v = dmap[idx_o]
                assert len(
                    idx_v
                ) == 1, "Here we only support the possibility to destroy one input"
                ins = node.inputs[idx_v[0]]
            if vmap and idx_o in vmap:
                assert ins is None
                idx_v = vmap[idx_o]
                assert len(
                    idx_v
                ) == 1, "Here we only support the possibility to view one input"
                ins = node.inputs[idx_v[0]]
            if ins is not None:
                assert isinstance(ins, theano.Variable)
                origin = view_of.get(ins, ins)
                view_of[out] = origin
                viewed_by[origin].append(out)
            idx_o += 1

        for ins in node.inputs:
            assert not (ins in view_of and viewed_by[ins])
            if (getattr(ins, 'ndim', None) == 0 and not storage_map[ins][0]
                    and ins not in fgraph.outputs and ins.owner and all([
                        compute_map_re[v][0]
                        for v in dependencies.get(ins, [])
                    ]) and ins not in allocated):
                # Constant Memory cannot be changed
                # Constant and shared variables' storage_map value is not empty
                reuse_out = None
                if ins not in view_of and not viewed_by.get(ins, []):
                    # where gc
                    for i in range(idx + 1, len(order)):
                        if reuse_out:
                            break
                        for out in order[i].outputs:
                            if (getattr(out, 'ndim', None) == 0
                                    and out not in pre_allocated
                                    and ins.type == out.type):
                                reuse_out = out
                                pre_allocated.add(out)
                                allocated.add(ins)
                elif ins in view_of:
                    origin = view_of[ins]
                    if ins in viewed_by[origin]:
                        viewed_by[origin].remove(ins)
                    if (not viewed_by[origin] and origin not in fgraph.inputs
                            and not isinstance(origin, theano.Constant)):
                        # where gc
                        for i in range(idx + 1, len(order)):
                            if reuse_out:
                                break
                            for out in order[i].outputs:
                                if (getattr(out, 'ndim', None) == 0
                                        and out not in pre_allocated
                                        and ins.type == out.type):
                                    reuse_out = out
                                    pre_allocated.add(out)
                                    allocated.add(ins)

                if reuse_out:
                    reallocated_info[ins] = [ins, reuse_out]

    return reallocated_info
Exemplo n.º 37
0
Arquivo: vm.py Projeto: rsk2327/Theano
    def __call__(self):
        storage_map = self.storage_map
        compute_map = self.compute_map
        thunks = self.thunks
        dependencies = self.dependencies
        self.node_executed_order = []
        self.node_cleared_order = []

        for k in self.storage_map:
            compute_map[k][0] = (k.owner is None)

        # apply_stack contains nodes
        apply_stack = list(self.base_apply_stack)
        last_apply_stack_len = -1

        # This record all function inputs/shared varibles and constants
        for var, data in self.storage_map.iteritems():
            if data[0] is None:
                continue
            if hasattr(var.type, 'get_shape_info'):
                sh = var.type.get_shape_info(data[0])
            else:
                sh = 'input no shape'
            self.variable_shape[var] = sh
            st = getattr(data[0], 'strides', 'input no strides')
            if getattr(data[0], 'flags', False) and data[0].flags.c_contiguous:
                st = 'c'
            elif (hasattr(data[0], 'is_c_contiguous') and
                  data[0].is_c_contiguous()):
                st = "c"
            self.variable_strides[var] = st

        while apply_stack:
            # Make sure something happened last time round.  This is
            # just a safety check to make sure the op is written
            # correctly apply_stack should either decrease in length
            # by one (a thunk successfully applied), or increase in
            # length (added dependencies over and above the original).
            # NB: this doesn't catch cycles (would be too expensive/slow),
            #     just stalls.
            apply_stack_len = len(apply_stack)
            assert apply_stack_len != last_apply_stack_len
            last_apply_stack_len = apply_stack_len

            current_apply = apply_stack.pop()
            current_inputs = current_apply.inputs
            current_outputs = current_apply.outputs
            current_deps = current_inputs + current_apply.destroy_dependencies

            computed_ins = all(compute_map[v][0] for v in current_deps)
            computed_outs = all(compute_map[v][0] for v in current_outputs)

            if not thunks[self.node_idx[current_apply]].lazy:
                #
                # stack loop: Normal Non-Lazy Case
                # ================================
                #
                # Check if all inputs are in place
                # If so compute thunk and remove it from the apply_stack
                # If not leave it in, and add to the apply_stack those
                # that will produce you those inputs

                if computed_ins and not computed_outs:
                    # -- Non-lazy case: have inputs, time to compute outputs
                    try:
                        _, dt = self.run_thunk_of_node(current_apply)
                        del _
                        if config.profile:
                            current_idx = self.node_idx[current_apply]
                            self.call_counts[current_idx] += 1
                            self.call_times[current_idx] += dt
                            # Computing the memory footprint of the the op
                            # ?? What about inplace .. if the op is inplace
                            # you don't actually ask for more memory!
                            for (idx, o) in enumerate(
                                    thunks[self.node_idx[
                                        current_apply]].outputs):
                                var = self.nodes[current_idx].outputs[idx]
                                if hasattr(var.type, 'get_shape_info'):
                                    sh = var.type.get_shape_info(o[0])
                                else:
                                    sh = 'input no shape'
                                self.variable_shape[var] = sh
                                st = getattr(o[0], 'strides',
                                             'input no strides')
                                if (getattr(o[0], 'flags', False) and
                                        o[0].flags.c_contiguous):
                                    st = 'c'
                                elif (hasattr(data[0], 'is_c_contiguous') and
                                      data[0].is_c_contiguous()):
                                    st = "c"
                                self.variable_strides[var] = st
                    except Exception:
                        link.raise_with_op(current_apply,
                                           self.thunks[self.node_idx[current_apply]],
                                           storage_map=storage_map)
                    for o in current_apply.outputs:
                        compute_map[o][0] = 1

                    input_index = []
                    # A list store the index of inputs variables

                    if self.allow_gc:
                        for i in current_apply.inputs:
                            # Garbage Collection -> check if anybody else uses
                            # this input
                            if (dependencies[i]
                                    and i.owner
                                    and i not in self.outputs):
                                if all(compute_map[v][0]
                                        for v in dependencies[i]):
                                    storage_map[i][0] = None
                                    input_index.append(
                                        current_apply.inputs.index(i))

                                    # DO NOT set compute_map to 0

                                    # If values become False and the
                                    #current_apply is still in the
                                    # stack, this will cause it to be
                                    # recomputed! This can cause wrong value
                                    # with some combination of inplace op.
                                    compute_map[i][0] = 2
                                    if (config.warn.vm_gc_bug and
                                        current_apply in apply_stack and
                                        getattr(current_apply.op,
                                                'destroy_map',
                                                False)):
                                        warnings.warn(
                                            "There was a bug that existed in the default Theano configuration,"
                                            " only in the development version between July 5th 2012"
                                            " and July 30th 2012. This was not in a released version."
                                            " The bug was affecting this script.",
                                            # The stack level is not good when
                                            # inside a Scan.
                                            stacklevel=3
                                        )
                    self.node_cleared_order.append(input_index)

                elif not computed_ins:
                    # -- Non-lazy case, need inputs
                    apply_stack.append(current_apply)
                    apply_stack.extend(inp.owner
                                       for inp in current_deps
                                       if inp.owner)

            elif not computed_outs:
                #
                # stack loop: Lazy Evaluation Case
                # ================================
                #
                # Lazy evaluation protocol is to run the thunk with the
                # current storage_map and compute_map accessed via closure,
                # and the thunk will return a list of variables from its input
                # list that it requires.

                try:
                    requires, dt = self.run_thunk_of_node(current_apply)
                    current_idx = self.node_idx[current_apply]
                    self.call_counts[current_idx] += 1
                    self.call_times[current_idx] += dt

                except Exception:
                    link.raise_with_op(current_apply,
                                       self.thunks[self.node_idx[current_apply]],
                                       storage_map)

                if requires:
                    for r in requires:
                        # We are not done with this op ..  so we added
                        # back and see to get the inputs we are
                        # missing
                        apply_stack.append(current_apply)
                        if current_apply.inputs[r].owner:
                            apply_stack.append(current_apply.inputs[r].owner)
                else:
                    if config.profile:
                        for (idx, o) in enumerate(thunks[
                                self.node_idx[current_apply]].outputs):
                            var = self.nodes[
                                self.node_idx[current_apply]].outputs[idx]

                            if hasattr(var.type, 'get_shape_info'):
                                sh = var.type.get_shape_info(o[0])
                            else:
                                sh = 'input no shape'
                            self.variable_shape[var] = sh
                            st = getattr(o[0], 'strides', 'input no strides')
                            if (getattr(o[0], 'flags', False) and
                                    o[0].flags.c_contiguous):
                                st = 'c'
                            elif (hasattr(data[0], 'is_c_contiguous') and
                                  data[0].is_c_contiguous()):
                                st = "c"
                            self.variable_strides[var] = st

                    input_index = []

                    if self.allow_gc:
                        for i in current_apply.inputs:
                            if (dependencies[i] and i.owner and
                                    i not in self.outputs):
                                empty_storage_map = True
                                for x in dependencies[i]:
                                    if not compute_map[x][0]:
                                        empty_storage_map = False
                                        break
                                if empty_storage_map:
                                    storage_map[i][0] = None
                                    input_index.append(
                                        current_apply.inputs.index(i))
                                    # See the not lazy gc code for explanations
                                    # of compute_map change
                                    compute_map[i][0] = 2

                    self.node_cleared_order.append(input_index)

        # Hacky coarse gc final pass
        # This is required until we have a proper gc algorithm for graphs with
        # lazy evaluation. See discussion on theano-dev June 19 2012.
        final_index = []

        if self.allow_gc:
            for v in storage_map:
                if v.owner and not v in self.outputs:
                    if compute_map[v][0] == 2:
                        continue
                    else:
                        storage_map[v][0] = None
                        final_index.append(v)
                        compute_map[v][0] = 2

        self.node_cleared_order.append(final_index)
Exemplo n.º 38
0
Arquivo: vm.py Projeto: rsk2327/Theano
    def make_vm(self, nodes, thunks,
                input_storage, output_storage, storage_map,
                post_thunk_clear,
                computed,
                compute_map,
                updated_vars
                ):

        pre_call_clear = [storage_map[v] for v in self.no_recycling]

        if (self.callback is not None or
                (config.profile and config.profile_memory)):

            if self.use_cloop and self.callback is not None:
                logger.warn('CVM does not support callback, using Stack VM.')
            if self.use_cloop and config.profile_memory:
                warnings.warn(
                    'CVM does not support memory profile, using Stack VM.')
            # Needed when allow_gc=True and profiling
            deps = self.compute_gc_dependencies(storage_map)
            vm = Stack(
                nodes, thunks, pre_call_clear,
                storage_map, compute_map,
                self.fgraph, self.allow_gc,
                dependencies=deps,
                callback=self.callback)
        elif self.use_cloop:
            # create a map from nodes to ints and vars to ints
            nodes_idx = {}
            vars_idx = {}
            for i, node in enumerate(nodes):
                nodes_idx[node] = i
                for v in node.inputs + node.outputs:
                    vars_idx.setdefault(v, len(vars_idx))
            for v in self.fgraph.inputs + self.fgraph.outputs:
                vars_idx.setdefault(v, len(vars_idx))

            nodes_idx_inv = {}
            vars_idx_inv = {}
            for (node, i) in nodes_idx.items():
                nodes_idx_inv[i] = node
            for (var, i) in vars_idx.items():
                vars_idx_inv[i] = var

            # put storage_map and compute_map into a int-based scheme
            n_applies = len(nodes)
            storage_map_list = [storage_map[vars_idx_inv[i]]
                                for i in xrange(len(vars_idx_inv))]
            compute_map_list = [compute_map[vars_idx_inv[i]]
                                for i in xrange(len(vars_idx_inv))]
            if nodes:
                assert type(storage_map_list[0]) is list
                assert type(compute_map_list[0]) is list

            # Needed when allow_gc=True and profiling
            dependency_map = self.compute_gc_dependencies(storage_map)
            dependency_map_list = [
                [vars_idx[d] for d in dependency_map[vars_idx_inv[i]]]
                for i in xrange(len(vars_idx_inv))]

            # build the pointers to node inputs and offsets
            base_input_output_list = []
            node_n_inputs = []
            node_n_outputs = []
            node_input_offset = []
            node_output_offset = []
            for node in nodes:
                inputs_idx = [vars_idx[v] for v in node.inputs]
                outputs_idx = [vars_idx[v] for v in node.outputs]
                node_n_inputs.append(len(inputs_idx))
                node_n_outputs.append(len(outputs_idx))
                node_input_offset.append(len(base_input_output_list))
                base_input_output_list.extend(inputs_idx)
                node_output_offset.append(len(base_input_output_list))
                base_input_output_list.extend(outputs_idx)

            # build the var owner array
            var_owner = [None] * len(vars_idx)
            for (var, i) in vars_idx.items():
                if var.owner:
                    var_owner[i] = nodes_idx[var.owner]

            is_lazy_list = [int(th.lazy) for th in thunks]
            output_vars = [vars_idx[v] for v in self.fgraph.outputs]

            # builds the list of prereqs induced by e.g. destroy_handler
            ords = self.fgraph.orderings()
            node_prereqs = []
            node_output_size = []
            for i, node in enumerate(nodes):
                node_output_size.append(0)
                prereq_var_idxs = []
                for prereq_node in ords.get(node, []):
                    prereq_var_idxs.extend(
                        [vars_idx[v] for v in prereq_node.outputs])
                prereq_var_idxs = list(set(prereq_var_idxs))
                prereq_var_idxs.sort()  # TODO: why sort?
                node_prereqs.append(prereq_var_idxs)

            # Builds the list of input storage to update (according to update
            # rules) when the outputs are computed.
            # They are in the same order as the second part of output_vars
            # (output_vars contains first the returned outputs, then the
            # values of the update expressions).
            update_storage = []
            update_in_from_out = {}
            for (ivar, ovar) in updated_vars.items():
                update_in_from_out[vars_idx[ovar]] = vars_idx[ivar]
            for oidx in output_vars:
                if oidx in update_in_from_out:
                    update_storage.append(update_in_from_out[oidx])

            c0 = sys.getrefcount(node_n_inputs)
            vm = CVM(
                nodes,
                thunks,
                pre_call_clear,
                allow_gc=self.allow_gc,
                call_counts=[0] * len(nodes),
                call_times=[0.0] * len(nodes),
                compute_map_list=compute_map_list,
                storage_map_list=storage_map_list,
                base_input_output_list=base_input_output_list,
                node_n_inputs=node_n_inputs,
                node_n_outputs=node_n_outputs,
                node_input_offset=node_input_offset,
                node_output_offset=node_output_offset,
                var_owner=var_owner,
                is_lazy_list=is_lazy_list,
                output_vars=output_vars,
                node_prereqs=node_prereqs,
                node_output_size=node_output_size,
                update_storage=update_storage,
                dependencies=dependency_map_list,
            )
            assert c0 == sys.getrefcount(node_n_inputs)
        else:
            lazy = self.lazy
            if lazy is None:
                lazy = config.vm.lazy
            if lazy is None:
                lazy = not all([(not th.lazy) for th in thunks])
            if not lazy:
                # there is no conditional in the graph
                if self.allow_gc:
                    vm = LoopGC(
                        nodes,
                        thunks,
                        pre_call_clear,
                        post_thunk_clear)
                else:
                    vm = Loop(
                        nodes,
                        thunks,
                        pre_call_clear)
            else:
                # Needed when allow_gc=True and profiling
                deps = self.compute_gc_dependencies(storage_map)
                vm = Stack(
                    nodes, thunks, pre_call_clear,
                    storage_map, compute_map,
                    self.fgraph, self.allow_gc,
                    dependencies=deps
                )
        return vm
Exemplo n.º 39
0
Arquivo: sp.py Projeto: gyenney/Tools
                for oy in N.arange(lbound[0], ubound[0], dy):
                     # loop over output image width
                    for ox in N.arange(lbound[1], ubound[1], dx):

                       # kern[l] is filter value to apply at (oj,oi)
                       # for (iy,ix)
                        l = 0

                        # ... ITERATE OVER INPUT UNITS IN RECEPTIVE FIELD
                        for ky in oy + N.arange(kshp[0]):
                            for kx in ox + N.arange(kshp[1]):

                                # verify if we are still within image
                                # boundaries. Equivalent to
                                # zero-padding of the input image
                                if (all((ky, kx) >= topleft) and
                                    all((ky, kx) < botright)):

                                    # convert to "valid" input space
                                    # coords used to determine column
                                    # index to write to in sparse mat
                                    iy, ix = N.array((ky, kx)) - topleft
                                    # determine raster-index of input pixel...

                                    # taking into account multiple
                                    # input features
                                    col = iy * inshp[2] + ix + \
                                          fmapi * N.prod(inshp[1:])

                                    # convert oy,ox values to output
                                    # space coordinates
Exemplo n.º 40
0
Arquivo: vm.py Projeto: pawef/Theano
class VM_Linker(link.LocalLinker):
    """
    Class that satisfies the Linker interface by acting as a VM factory.
    """
    def __init__(self,
                 allow_gc=None,
                 use_cloop=False,
                 callback=None,
                 lazy=None,
                 schedule=None):
        """
        allow_gc - force the virtual machine to clean up unnecessary
            references, in order to allow garbage collection on
            intermediate values during computation of a function.
            If None use as default the value of the Theano flag allow_gc.

        use_cloop - use the C-based virtual machine if possible

        callback - a callable object to call after each call to a thunk within
            the virtual machine.  It will be called with four arguments called
            'node', 'thunk', 'storage_map', and 'compute_map'.

        lazy - Useful only when use_cloop is False. When lazy is None, use the
            theano flag vm.lazy value. Then if we have a None (default) we auto
            detect if lazy evaluation is needed and use the apropriate
            version. If lazy is True or False, we force the version used
            between Loop/LoopGC and Stack.

        """
        # Note: if more parameters are added to __init__, make sure to forward
        # them in the "type(self)(...)" call in the "accept" method below.
        if allow_gc is None:
            allow_gc = config.allow_gc
        self.fgraph = None
        self.allow_gc = allow_gc
        self.use_cloop = use_cloop
        self.callback = callback
        self.lazy = lazy
        self.updated_vars = {}
        if schedule:
            self.schedule = schedule

    def accept(self, fgraph, no_recycling=None):
        """
        :param fgraph: a PerformLinker can have accepted one FunctionGraph
            instance at a time.

        :param no_recycling: WRITEME

        :returns: self if fgraph is the first FunctionGraph that has ever been
            associated to self, else, a new VM_Linker associated to fgraph.
        """
        if (config.profile and hasattr(theano, 'sandbox')
                and hasattr(theano.sandbox, 'cuda')
                and theano.sandbox.cuda.cuda_enabled):
            if os.environ.get('CUDA_LAUNCH_BLOCKING', '0') != '1':
                raise Exception(
                    "You are running the Theano profiler with CUDA enabled."
                    " Theano GPU ops execution is asynchronous by default."
                    " So by default, the profile is useless."
                    " You must set the environment variable"
                    " CUDA_LAUNCH_BLOCKING to 1 to tell the CUDA driver to"
                    " synchronize the execution to get a meaningful profile.")

        if no_recycling is None:
            no_recycling = []
        if self.fgraph is not None and self.fgraph is not fgraph:
            # Build a new VM_Linker, and call accept on that one.
            # Warning: make sure to forward the correct values of
            # all parameters to __init__ here.
            return type(self)(allow_gc=self.allow_gc,
                              use_cloop=self.use_cloop,
                              callback=self.callback,
                              lazy=self.lazy,
                              schedule=self.schedule).accept(
                                  fgraph, no_recycling)
        self.fgraph = fgraph
        self.no_recycling = no_recycling
        return self

    def accept_var_updates(self, updated_vars):
        self.updated_vars = updated_vars
        # This method simply records in the linker which variables have update
        # expressions.  It does not imply that the linker will actually
        # implement these updates (see need_update_inputs).  This mechanism is
        # admittedly confusing, and it could use some cleaning up. The base
        # Linker object should probably go away completely.

    def compute_gc_dependencies(self, variables):
        """
        Returns dict: variable K -> list of variables [v1, v2, v3, ...]
        for each K in variables.


        The variables v1, v2, ... are the full set of variables that depend
        directly on K. When we know that none of them will need to be
        computed, we know that:
        * K will not need to be computed
        * if K is already computed, it can be released for garbage collection


        Parameters
        ----------
        variables - iterable over the variables used in a graph computation.


        N.B. gc means garbage collection

        """
        dependencies = {}
        for k in variables:
            dependencies[k] = []
            # If k has no owner, it is an input / constant and its value
            # should not be removed from the storage_map because we have no
            # way of getting it back.
            #
            # XXX if k has no clients... what is it doing in the computation?
            if k.owner and k.clients:
                ls = []
                for cl in k.clients:
                    if cl[0] != 'output':
                        ls += cl[0].outputs
                dependencies[k] += ls
        return dependencies

    def make_vm(
        self,
        nodes,
        thunks,
        input_storage,
        output_storage,
        storage_map,
        post_thunk_clear,
        computed,
        compute_map,
        updated_vars,
    ):

        pre_call_clear = [storage_map[v] for v in self.no_recycling]

        if (self.callback is not None
                or (config.profile and config.profile_memory)):

            if self.use_cloop and self.callback is not None:
                logger.warn('CVM does not support callback, using Stack VM.')
            if self.use_cloop and config.profile_memory:
                warnings.warn(
                    'CVM does not support memory profile, using Stack VM.')
            # Needed when allow_gc=True and profiling
            deps = self.compute_gc_dependencies(storage_map)
            vm = Stack(nodes,
                       thunks,
                       pre_call_clear,
                       storage_map,
                       compute_map,
                       self.fgraph,
                       self.allow_gc,
                       dependencies=deps,
                       callback=self.callback)
        elif self.use_cloop:
            # create a map from nodes to ints and vars to ints
            nodes_idx = {}
            vars_idx = {}
            for i, node in enumerate(nodes):
                nodes_idx[node] = i
                for v in node.inputs + node.outputs:
                    vars_idx.setdefault(v, len(vars_idx))
            for v in self.fgraph.inputs + self.fgraph.outputs:
                vars_idx.setdefault(v, len(vars_idx))

            nodes_idx_inv = {}
            vars_idx_inv = {}
            for (node, i) in nodes_idx.items():
                nodes_idx_inv[i] = node
            for (var, i) in vars_idx.items():
                vars_idx_inv[i] = var

            # put storage_map and compute_map into a int-based scheme
            n_applies = len(nodes)
            storage_map_list = [
                storage_map[vars_idx_inv[i]] for i in xrange(len(vars_idx_inv))
            ]
            compute_map_list = [
                compute_map[vars_idx_inv[i]] for i in xrange(len(vars_idx_inv))
            ]
            if nodes:
                assert type(storage_map_list[0]) is list
                assert type(compute_map_list[0]) is list

            # Needed when allow_gc=True and profiling
            dependency_map = self.compute_gc_dependencies(storage_map)
            dependency_map_list = [[
                vars_idx[d] for d in dependency_map[vars_idx_inv[i]]
            ] for i in xrange(len(vars_idx_inv))]

            # build the pointers to node inputs and offsets
            base_input_output_list = []
            node_n_inputs = []
            node_n_outputs = []
            node_input_offset = []
            node_output_offset = []
            for node in nodes:
                inputs_idx = [vars_idx[v] for v in node.inputs]
                outputs_idx = [vars_idx[v] for v in node.outputs]
                node_n_inputs.append(len(inputs_idx))
                node_n_outputs.append(len(outputs_idx))
                node_input_offset.append(len(base_input_output_list))
                base_input_output_list.extend(inputs_idx)
                node_output_offset.append(len(base_input_output_list))
                base_input_output_list.extend(outputs_idx)

            # build the var owner array
            var_owner = [None] * len(vars_idx)
            for (var, i) in vars_idx.items():
                if var.owner:
                    var_owner[i] = nodes_idx[var.owner]

            is_lazy_list = [int(th.lazy) for th in thunks]
            output_vars = [vars_idx[v] for v in self.fgraph.outputs]

            # builds the list of prereqs induced by e.g. destroy_handler
            ords = self.fgraph.orderings()
            node_prereqs = []
            node_output_size = []
            for i, node in enumerate(nodes):
                node_output_size.append(0)
                prereq_var_idxs = []
                for prereq_node in ords.get(node, []):
                    prereq_var_idxs.extend(
                        [vars_idx[v] for v in prereq_node.outputs])
                prereq_var_idxs = list(set(prereq_var_idxs))
                prereq_var_idxs.sort()  # TODO: why sort?
                node_prereqs.append(prereq_var_idxs)

            # Builds the list of input storage to update (according to update
            # rules) when the outputs are computed.
            # They are in the same order as the second part of output_vars
            # (output_vars contains first the returned outputs, then the
            # values of the update expressions).
            update_storage = []
            update_in_from_out = {}
            for (ivar, ovar) in updated_vars.items():
                update_in_from_out[vars_idx[ovar]] = vars_idx[ivar]
            for oidx in output_vars:
                if oidx in update_in_from_out:
                    update_storage.append(update_in_from_out[oidx])

            c0 = sys.getrefcount(node_n_inputs)
            vm = CVM(
                nodes,
                thunks,
                pre_call_clear,
                allow_gc=self.allow_gc,
                call_counts=[0] * len(nodes),
                call_times=[0.0] * len(nodes),
                compute_map_list=compute_map_list,
                storage_map_list=storage_map_list,
                base_input_output_list=base_input_output_list,
                node_n_inputs=node_n_inputs,
                node_n_outputs=node_n_outputs,
                node_input_offset=node_input_offset,
                node_output_offset=node_output_offset,
                var_owner=var_owner,
                is_lazy_list=is_lazy_list,
                output_vars=output_vars,
                node_prereqs=node_prereqs,
                node_output_size=node_output_size,
                update_storage=update_storage,
                dependencies=dependency_map_list,
            )
            assert c0 == sys.getrefcount(node_n_inputs)
        else:
            lazy = self.lazy
            if lazy is None:
                lazy = config.vm.lazy
            if lazy is None:
                lazy = not all([(not th.lazy) for th in thunks])
            if not lazy:
                # there is no conditional in the graph
                if self.allow_gc:
                    vm = LoopGC(
                        nodes,
                        thunks,
                        pre_call_clear,
                        post_thunk_clear,
                    )
                else:
                    vm = Loop(
                        nodes,
                        thunks,
                        pre_call_clear,
                    )
            else:
                # Needed when allow_gc=True and profiling
                deps = self.compute_gc_dependencies(storage_map)
                vm = Stack(nodes,
                           thunks,
                           pre_call_clear,
                           storage_map,
                           compute_map,
                           self.fgraph,
                           self.allow_gc,
                           dependencies=deps)
        return vm

    def make_all(
        self,
        profiler=None,
        input_storage=None,
        output_storage=None,
    ):
        fgraph = self.fgraph
        order = self.schedule(fgraph)
        no_recycling = self.no_recycling

        input_storage, output_storage, storage_map = link.map_storage(
            fgraph, order, input_storage, output_storage)
        compute_map = {}
        for k in storage_map:
            compute_map[k] = [k.owner is None]

        thunks = []

        # Collect Reallocation Info
        compute_map_re = defaultdict(lambda: [0])
        for var in fgraph.inputs:
            compute_map_re[var][0] = 1

        if getattr(fgraph.profile, 'dependencies', None):
            dependencies = getattr(fgraph.profile, 'dependencies')
        else:
            dependencies = self.compute_gc_dependencies(storage_map)

        reallocated_info = calculate_reallocate_info(order, fgraph,
                                                     storage_map,
                                                     compute_map_re,
                                                     dependencies)

        for node in order:
            try:
                thunks.append(
                    node.op.make_thunk(node, storage_map, compute_map,
                                       no_recycling))
                if not hasattr(thunks[-1], 'lazy'):
                    # We don't want all ops maker to think about lazy Ops.
                    # So if they didn't specify that its lazy or not, it isn't.
                    # If this member isn't present, it will crash later.
                    thunks[-1].lazy = False
            except Exception, e:
                e.args = ("The following error happened while"
                          " compiling the node", node, "\n") + e.args
                raise
        for node, thunk in zip(order, thunks):
            thunk.inputs = [storage_map[v] for v in node.inputs]
            thunk.outputs = [storage_map[v] for v in node.outputs]

        lazy = self.lazy
        if lazy is None:
            lazy = config.vm.lazy
        if lazy is None:
            lazy = not all([(not th.lazy) for th in thunks])
        if not (lazy or (config.profile and config.profile_memory)
                or self.use_cloop or self.callback):
            for pair in reallocated_info.values():
                storage_map[pair[1]] = storage_map[pair[0]]

        computed, last_user = link.gc_helper(order)
        if self.allow_gc:
            post_thunk_clear = []
            for node in order:
                clear_after_this_thunk = []
                for input in node.inputs:
                    if ((input in computed) and (input not in fgraph.outputs)
                            and (node == last_user[input])
                            and input not in reallocated_info.keys()):
                        clear_after_this_thunk.append(storage_map[input])
                post_thunk_clear.append(clear_after_this_thunk)
        else:
            post_thunk_clear = None

        vm = self.make_vm(
            order,
            thunks,
            input_storage,
            output_storage,
            storage_map,
            post_thunk_clear,
            computed,
            compute_map,
            self.updated_vars,
        )

        vm.storage_map = storage_map

        return (vm, [
            link.Container(input, storage)
            for input, storage in zip(fgraph.inputs, input_storage)
        ], [
            link.Container(output, storage, True)
            for output, storage in zip(fgraph.outputs, output_storage)
        ], thunks, order)