Пример #1
0
def setup_model(p):
    ladder = LadderAE(p)
    # Setup inputs
    input_type = TensorType('float32',
                            [False] * (len(p.encoder_layers[0]) + 1))
    x_only = input_type('features_unlabeled')
    x = input_type('features_labeled')
    y = theano.tensor.lvector('targets_labeled')
    ladder.apply(x, y, x_only)

    # Load parameters if requested
    if p.get('load_from'):
        trained_params = ojoin(p.load_from, 'trained_params_best.npz')
        #         trained_params = ojoin(p.load_from, 'trained_params_best.npz')
        #         if not file_exists(trained_params):
        #             trained_params = ojoin(p.load_from, 'trained_params.npz')

        with open(trained_params) as f:
            loaded = numpy.load(f)
            cg = ComputationGraph([ladder.costs.total])
            current_params = VariableFilter(roles=[PARAMETER])(cg.variables)
            logger.info('Loading parameters: %s' % ', '.join(loaded.keys()))
            for param in current_params:
                assert param.get_value().shape == loaded[param.name].shape
                param.set_value(loaded[param.name])

    return ladder
Пример #2
0
    def make_node(self, kern, topgrad, shape=None):
        kern = as_tensor_variable(kern)
        topgrad = as_tensor_variable(topgrad)
        kern, topgrad = self.as_common_dtype(kern, topgrad)
        if self.unshared is True:
            if kern.type.ndim != 6:
                raise TypeError("kern must be 6D tensor")
        else:
            if kern.type.ndim != 4:
                raise TypeError("kern must be 4D tensor")
        if topgrad.type.ndim != 4:
            raise TypeError("topgrad must be 4D tensor")
        if shape is None:
            if self.subsample != (1, 1):
                raise ValueError("shape must be given if subsample != (1, 1)")
            height_width = []
        else:
            height_width = [
                as_tensor_variable(shape[0]).astype("int64"),
                as_tensor_variable(shape[1]).astype("int64"),
            ]

        if self.num_groups > 1:
            broadcastable = [topgrad.type.broadcastable[0], False, False, False]
        else:
            broadcastable = [
                topgrad.type.broadcastable[0],
                kern.type.broadcastable[-3],
                False,
                False,
            ]
        dtype = kern.type.dtype
        return Apply(
            self, [kern, topgrad] + height_width, [TensorType(dtype, broadcastable)()]
        )
Пример #3
0
    def make_node(self, img, topgrad, shape=None):
        img = as_tensor_variable(img)
        topgrad = as_tensor_variable(topgrad)
        img, topgrad = self.as_common_dtype(img, topgrad)
        if img.type.ndim != 5:
            raise TypeError("img must be 5D tensor")
        if topgrad.type.ndim != 5:
            raise TypeError("topgrad must be 5D tensor")
        if shape is None:
            if self.subsample != (1, 1, 1) or self.border_mode == "half":
                raise ValueError(
                    "shape must be given if subsample != (1, 1, 1)"
                    ' or border_mode == "half"')
            height_width_depth = []
        else:
            height_width_depth = [
                as_tensor_variable(shape[0]).astype("int64"),
                as_tensor_variable(shape[1]).astype("int64"),
                as_tensor_variable(shape[2]).astype("int64"),
            ]

        broadcastable = [
            topgrad.type.broadcastable[1],
            img.type.broadcastable[1],
            False,
            False,
            False,
        ]
        dtype = img.type.dtype
        return Apply(
            self,
            [img, topgrad] + height_width_depth,
            [TensorType(dtype, broadcastable)()],
        )
Пример #4
0
        def test_grad_none(self):
            # Check that None is never returned as input gradient
            # when calling self.op.grad
            # We use all values in self.good because this has to be true
            # whether or not the values work for utt.verify_grad.
            if not hasattr(self.op, "grad"):
                # This is not actually an Op
                return

            for testname, inputs in self.good.items():
                inputs = [copy(input) for input in inputs]
                inputrs = [
                    TensorType(
                        dtype=input.dtype,
                        broadcastable=[
                            shape_elem == 1 for shape_elem in input.shape
                        ],
                    )() for input in inputs
                ]

                if isinstance(self.expected,
                              dict) and testname in self.expected:
                    expecteds = self.expected[testname]
                    # with numpy version, when we print a number and read it
                    # back, we don't get exactly the same result, so we accept
                    # rounding error in that case.
                else:
                    expecteds = self.expected(*inputs)
                if not isinstance(expecteds, (list, tuple)):
                    expecteds = (expecteds, )

                out_grad_vars = []
                for out in expecteds:
                    if str(out.dtype) in tensor.discrete_dtypes:
                        dtype = config.floatX
                    else:
                        dtype = str(out.dtype)
                    bcast = [shape_elem == 1 for shape_elem in out.shape]
                    var = TensorType(dtype=dtype, broadcastable=bcast)()
                    out_grad_vars.append(var)

                try:
                    in_grad_vars = self.op.grad(inputrs, out_grad_vars)
                except (gof.utils.MethodNotDefined, NotImplementedError):
                    pass
                else:
                    assert None not in in_grad_vars
Пример #5
0
    def filter(self, data, strict=False, allow_downcast=None):
        if (isinstance(data, gpuarray.GpuArray)
                and data.typecode == self.typecode):
            # This is just to make this condition not enter the
            # following branches
            pass
        elif strict:
            if not isinstance(data, gpuarray.GpuArray):
                raise TypeError("%s expected a GpuArray object." % self, data,
                                type(data))
            if self.typecode != data.typecode:
                raise TypeError("%s expected typecode %d (dtype %s), "
                                "got %d (dtype %s)." %
                                (self, self.typecode, self.dtype,
                                 data.typecode, str(data.dtype)))
            if self.context != data.context:
                raise TypeError("data context does not match type context")
            # fallthrough to ndim check
        elif (allow_downcast or (allow_downcast is None and type(data) == float
                                 and self.dtype == config.floatX)):
            data = gpuarray.array(data,
                                  dtype=self.typecode,
                                  copy=False,
                                  ndmin=len(self.broadcastable),
                                  context=self.context)
        else:
            if not hasattr(data, 'dtype'):
                converted_data = theano._asarray(data, self.dtype)
                # We use the `values_eq` static function from TensorType
                # to handle NaN values.
                if TensorType.values_eq(np.asarray(data),
                                        converted_data,
                                        force_same_dtype=False):
                    data = converted_data
                    data = gpuarray.array(data, context=self.context)

            up_dtype = scalar.upcast(self.dtype, data.dtype)
            if up_dtype == self.dtype:
                data = gpuarray.array(data,
                                      dtype=self.dtype,
                                      copy=False,
                                      context=self.context)
            else:
                raise TypeError("%s cannot store a value of dtype %s "
                                "without risking loss of precision." %
                                (self, data.dtype))

        if self.ndim != data.ndim:
            raise TypeError(
                "Wrong number of dimensions: expected %s, "
                "got %s with shape %s." % (self.ndim, data.ndim, data.shape),
                data)
        shp = data.shape
        for i, b in enumerate(self.broadcastable):
            if b and shp[i] != 1:
                raise TypeError(
                    "Non-unit value on shape on a broadcastable"
                    " dimension.", shp, self.broadcastable)
        return data
Пример #6
0
def tensor5(name=None, dtype=None):
    """
    Returns a symbolic 5D tensor variable.
    """
    if dtype is None:
        dtype = theano.config.floatX

    type = TensorType(dtype, ((False,)*5))
    return type(name)
Пример #7
0
def setup_model():
    ladder = LadderAE()
    input_type = TensorType('float32', [False, False])
    x_lb = input_type('features_labeled')
    x_un = input_type('features_unlabeled')
    y = theano.tensor.lvector('targets_labeled')
    ladder.apply(x_lb, x_un, y)

    return ladder
Пример #8
0
    def filter(self, data, strict=False, allow_downcast=None):
        if (isinstance(data, gpuarray.GpuArray) and
                data.typecode == self.typecode):
            # This is just to make this condition not enter the
            # following branches
            pass
        elif strict:
            if not isinstance(data, gpuarray.GpuArray):
                raise TypeError("%s expected a GpuArray object." % self,
                                data, type(data))
            if self.typecode != data.typecode:
                raise TypeError("%s expected typecode %d (dtype %s), "
                                "got %d (dtype %s)." %
                                (self, self.typecode, self.dtype,
                                 data.typecode, str(data.dtype)))
            if self.context != data.context:
                raise TypeError("data context does not match type context")
            # fallthrough to ndim check
        elif (allow_downcast or
              (allow_downcast is None and
               type(data) == float and
               self.dtype == config.floatX)):
            data = gpuarray.array(data, dtype=self.typecode, copy=False,
                                  ndmin=len(self.broadcastable),
                                  context=self.context)
        else:
            if not hasattr(data, 'dtype'):
                converted_data = theano._asarray(data, self.dtype)
                # We use the `values_eq` static function from TensorType
                # to handle NaN values.
                if TensorType.values_eq(numpy.asarray(data),
                                        converted_data,
                                        force_same_dtype=False):
                    data = converted_data
                    data = gpuarray.array(data, context=self.context)

            up_dtype = scalar.upcast(self.dtype, data.dtype)
            if up_dtype == self.dtype:
                data = gpuarray.array(data, dtype=self.dtype, copy=False,
                                      context=self.context)
            else:
                raise TypeError("%s cannot store a value of dtype %s "
                                "without risking loss of precision." %
                                (self, data.dtype))

        if self.ndim != data.ndim:
            raise TypeError("Wrong number of dimensions: expected %s, "
                            "got %s with shape %s." % (self.ndim, data.ndim,
                                                       data.shape), data)
        shp = data.shape
        for i, b in enumerate(self.broadcastable):
            if b and shp[i] != 1:
                raise TypeError("Non-unit value on shape on a broadcastable"
                                " dimension.", shp, self.broadcastable)
        return data
Пример #9
0
def test_filter_strict():
    test_type = TensorType(config.floatX, [])

    with pytest.raises(TypeError):
        test_type.filter(1, strict=True)

    with pytest.raises(TypeError):
        test_type.filter(np.array(1, dtype=int), strict=True)
Пример #10
0
    def make_node(self, img, kern):
        img = as_tensor_variable(img)
        kern = as_tensor_variable(kern)
        img, kern = self.as_common_dtype(img, kern)
        if img.type.ndim != 5:
            raise TypeError("img must be 5D tensor")
        if kern.type.ndim != 5:
            raise TypeError("kern must be 5D tensor")

        broadcastable = [
            img.type.broadcastable[0],
            kern.type.broadcastable[0],
            False,
            False,
            False,
        ]
        dtype = img.type.dtype
        return Apply(self, [img, kern], [TensorType(dtype, broadcastable)()])
Пример #11
0
def _data_to_tensor(data, name=None, cast_floats_to_floatx=True, test=True):
    # TODO:
    ndim = 0 if np.isscalar(data) else data.ndim

    is_dtype = lambda x, dtype: isinstance(x, dtype) or isinstance(
        x, np.ndarray) and x.dtype == dtype

    # Need to also downcast ints to int32 if floatX is float32, otherwise things like int_array.mean() return float64
    # objects, which (a) slows things down and (b) causes an error when you try to update 32-bit shared variabkles
    # with 64 bit values.

    dtype = \
        theano.config.floatX if (cast_floats_to_floatx and is_dtype(data, float)) else \
        'int32' if (cast_floats_to_floatx and theano.config.floatX == 'float32' and is_dtype(data, int)) else \
        'int64' if isinstance(data, (bool, int)) else \
        'float64' if isinstance(data, float) else \
        'int8' if data.dtype==bool else \
        data.dtype
    tensor = TensorType(dtype, (None, ) * ndim)(name)
    if test:
        tensor.tag.test_value = data.astype(dtype) if isinstance(
            data, np.ndarray) else np.array(data).astype(dtype)
    return tensor
Пример #12
0
def test_filter_float_subclass():
    """Make sure `TensorType.filter` can handle `float` subclasses."""
    with change_flags(floatX="float64"):
        test_type = TensorType("float64", broadcastable=[])

        nan = np.array([np.nan], dtype="float64")[0]
        assert isinstance(nan, np.float) and not isinstance(nan, np.ndarray)

        filtered_nan = test_type.filter(nan)
        assert isinstance(filtered_nan, np.ndarray)

    with change_flags(floatX="float32"):
        # Try again, except this time `nan` isn't a `float`
        test_type = TensorType("float32", broadcastable=[])

        nan = np.array([np.nan], dtype="float32")[0]
        assert isinstance(nan, np.floating) and not isinstance(nan, np.ndarray)

        filtered_nan = test_type.filter(nan)
        assert isinstance(filtered_nan, np.ndarray)
Пример #13
0
    def make_node(self, rng, size, dtype, *dist_params):
        """Create a random variable node.

        XXX: Unnamed/non-keyword arguments are considered distribution
        parameters!  If you want to set `size`, `rng`, and/or `name`, use their
        keywords.

        Parameters
        ----------
        rng: RandomStateType
            Existing Theano `RandomState` object to be used.  Creates a
            new one, if `None`.
        size: int or Sequence
            Numpy-like size of the output (i.e. replications).
        dtype: Theano dtype
            The dtype of the sampled output.  This value is only used when
            `self.dtype` isn't set.
        dist_params: list
            Distribution parameters.

        Results
        -------
        out: `Apply`
            A node with inputs `(rng, size, dtype) + dist_args` and outputs
            `(rng_var, out_var)`.

        """
        if size is None:
            size = constant([], dtype="int64")
        elif isinstance(size, int):
            size = as_tensor_variable([size], ndim=1)
        elif not isinstance(size, (np.ndarray, Variable, Sequence)):
            raise TypeError(
                "Parameter size must be None, an integer, or a sequence with integers."
            )
        else:
            size = cast(as_tensor_variable(size, ndim=1), "int64")

        assert size.dtype in int_dtypes

        dist_params = tuple(
            as_tensor_variable(p) if not isinstance(p, Variable) else p
            for p in dist_params
        )

        if rng is None:
            rng = theano.shared(np.random.RandomState())
        elif not isinstance(rng.type, RandomStateType):
            raise TypeError("The type of rng should be an instance of RandomStateType")

        bcast = self.compute_bcast(dist_params, size)
        dtype = self.dtype or dtype

        if dtype is None or (isinstance(dtype, str) and dtype not in all_dtypes):
            # dtype = tt.scal.upcast(self.dtype, *[p.dtype for p in dist_params])
            raise TypeError("dtype is unspecified")

        if isinstance(dtype, str):
            dtype_idx = constant(all_dtypes.index(dtype), dtype="int64")
        else:
            dtype_idx = constant(dtype, dtype="int64")
            dtype = all_dtypes[dtype_idx.data]

        outtype = TensorType(dtype=dtype, broadcastable=bcast)
        out_var = outtype()
        inputs = (rng, size, dtype_idx) + dist_params
        outputs = (rng.type(), out_var)

        return Apply(self, inputs, outputs)
Пример #14
0
def _data_to_tensor(data, name=None, cast_to_floatx=True, add_test_value=True):
    """
    Given the numpy data from the first function call, create the appropriate tensors
    :param data: A numpy array, from the first call to the function.
    :param name: Optionally, a name to give the variable.
    :param cast_to_floatx: Case inputs  to the global float type (define this in ~/.theanorc).
        'float': Just cast floats to floatX
        'all': Cast all inputs to floatX
        None: Don't cast anything to floatX
    :param add_test_values: Add test values to your tensor, based on the initial value of the data provided.  Advantage
        of this is it helps you catch and locate shape errors before compiling.  Disadvantage is on large computations
        you have to do an initial pass on CPU, which can be slow.
    :return:
    """
    if isinstance(data, (list, tuple)) and all(
            isinstance(d, np.ndarray) for d in data):
        return tuple(
            _data_to_tensor(d,
                            name=None,
                            cast_to_floatx=cast_to_floatx,
                            add_test_value=add_test_value) for d in data)

    assert cast_to_floatx in (
        'float', 'all',
        None), 'Bad argument for cast_to_floatx: %s' % (cast_to_floatx, )
    ndim = 0 if np.isscalar(data) else data.ndim

    warn_about_floatx = False  # Too many false positives.  Got to find a better way to give this warning.

    if warn_about_floatx:
        if isinstance(data, np.ndarray) and data.dtype in (
                int, bool) and theano.config.floatX == 'float32':
            logging.warn(
                "Your floatX (defined in ~/.theanorc) is float32, but you're passing in integer arrays to your function.  "
                "The problem is that most operations involving a float32 array and an int array result in a float64 array.  So what "
                "may happen is you may get a TypeError telling you that the update must have the same type as the original.  If you "
                "don't that's cool, ignore this.  Otherwise, to fix this problem, you either cast your inputs to floats beforehand, "
                "or compile your symbolic functions with: fcn.compile(cast_to_floatx='all')"
            )

    is_dtype = lambda x, dtype: isinstance(x, dtype) or isinstance(
        x, (np.ndarray, csr_matrix)) and x.dtype == dtype

    # Need to also downcast ints to int32 if floatX is float32, otherwise things like int_array.mean() return float64
    # objects, which (a) slows things down and (b) causes an error when you try to update 32-bit shared variabkles
    # with 64 bit values.

    dtype = \
        theano.config.floatX if (cast_to_floatx == 'all' or (cast_to_floatx=='float' and is_dtype(data, float))) else \
        'int32' if (cast_to_floatx=='float' and theano.config.floatX == 'float32' and is_dtype(data, int)) else \
        'int64' if isinstance(data, (bool, int)) else \
        'float64' if isinstance(data, float) else \
        'int8' if data.dtype==bool else \
        data.dtype
    if isinstance(data, csr_matrix):
        # Here we make a bunch of hacks to accomodate sparse matrices so that we don't have to change any of our other
        # code when handling them.   This was assembled in haste before a deadline.  Possibly it could be cleaner.  Probably.
        from theano import sparse
        tensor = sparse.csr_matrix(
            name='unnamed' if name is None else name,
            dtype=dtype,
        )
        if add_test_value:
            tensor.tag.test_value = data.astype(theano.config.floatX)
        # Do what theano couldn't and add the dot method to sparse
        def flattenit(var, ndim):
            assert var.indim == ndim, "This is a horrendous hack.  We don't actually flatten, we just check to see if it's the right shape.  It's not.  Also it needs test values on to work."
            return var

        sparse.SparseVariable.flatten = property(
            lambda self: lambda ndim: flattenit(self, ndim))
        sparse.SparseVariable.dot = property(
            lambda self: lambda other: theano.dot(self, other))

    else:
        tensor = TensorType(dtype, (None, ) * ndim)(name)
        if add_test_value:
            tensor.tag.test_value = data.astype(dtype) if isinstance(
                data, np.ndarray) else np.array(data).astype(dtype)
    return tensor
Пример #15
0
        def test_good(self):
            good = self.add_memmap_values(self.good)

            for testname, inputs in good.items():
                inputs = [copy(input) for input in inputs]
                inputrs = [
                    TensorType(
                        dtype=input.dtype,
                        broadcastable=[
                            shape_elem == 1 for shape_elem in input.shape
                        ],
                    )() for input in inputs
                ]
                try:
                    node = safe_make_node(self.op, *inputrs)
                except Exception as exc:
                    err_msg = ("Test %s::%s: Error occurred while"
                               " making a node with inputs %s") % (
                                   self.op, testname, inputs)
                    exc.args += (err_msg, )
                    raise

                try:
                    f = inplace_func(inputrs,
                                     node.outputs,
                                     mode=mode,
                                     name="test_good")
                except Exception as exc:
                    err_msg = ("Test %s::%s: Error occurred while"
                               " trying to make a Function") % (self.op,
                                                                testname)
                    exc.args += (err_msg, )
                    raise
                if isinstance(self.expected,
                              dict) and testname in self.expected:
                    expecteds = self.expected[testname]
                    # with numpy version, when we print a number and read it
                    # back, we don't get exactly the same result, so we accept
                    # rounding error in that case.
                    eps = 5e-9
                else:
                    expecteds = self.expected(*inputs)
                    eps = 1e-10

                if any([
                        i.dtype in ("float32", "int8", "uint8", "uint16")
                        for i in inputs
                ]):
                    eps = 1e-6
                eps = np.max([eps, _eps])

                try:
                    variables = f(*inputs)
                except Exception as exc:
                    err_msg = ("Test %s::%s: Error occurred while calling"
                               " the Function on the inputs %s") % (
                                   self.op, testname, inputs)
                    exc.args += (err_msg, )
                    raise

                if not isinstance(expecteds, (list, tuple)):
                    expecteds = (expecteds, )

                for i, (variable,
                        expected) in enumerate(zip(variables, expecteds)):
                    condition = (variable.dtype != expected.dtype
                                 or variable.shape != expected.shape
                                 or not np.allclose(
                                     variable, expected, atol=eps, rtol=eps))
                    assert not condition, (
                        "Test %s::%s: Output %s gave the wrong"
                        " value. With inputs %s, expected %s (dtype %s),"
                        " got %s (dtype %s). eps=%f"
                        " np.allclose returns %s %s") % (
                            self.op,
                            testname,
                            i,
                            inputs,
                            expected,
                            expected.dtype,
                            variable,
                            variable.dtype,
                            eps,
                            np.allclose(variable, expected, atol=eps,
                                        rtol=eps),
                            np.allclose(variable, expected),
                        )

                for description, check in self.checks.items():
                    assert check(
                        inputs,
                        variables), ("Test %s::%s: Failed check: %s (inputs"
                                     " were %s, outputs were %s)") % (
                                         self.op, testname, description,
                                         inputs, variables)
Пример #16
0
    def filter_inplace(self,
                       data,
                       old_data,
                       strict=False,
                       allow_downcast=None):
        if isinstance(data,
                      gpuarray.GpuArray) and data.typecode == self.typecode:
            # This is just to make this condition not enter the
            # following branches
            pass
        elif strict:
            if not isinstance(data, gpuarray.GpuArray):
                raise TypeError(f"{self} expected a GpuArray object.", data,
                                type(data))
            if self.typecode != data.typecode:
                raise TypeError(
                    f"{self} expected typecode {int(self.typecode)} (dtype {self.dtype}), "
                    f"got {int(data.typecode)} (dtype {data.dtype}).")
            if self.context != data.context:
                raise TypeError("data context does not match type context")
            # fallthrough to ndim check
        elif allow_downcast or (allow_downcast is None and type(data) == float
                                and self.dtype == config.floatX):
            if not isinstance(data, gpuarray.GpuArray):
                data = np.array(data,
                                dtype=self.dtype,
                                copy=False,
                                ndmin=len(self.broadcastable))
            else:
                data = gpuarray.array(
                    data,
                    dtype=self.typecode,
                    copy=False,
                    ndmin=len(self.broadcastable),
                    context=self.context,
                )
        else:
            if not hasattr(data, "dtype"):
                converted_data = _asarray(data, self.dtype)
                # We use the `values_eq` static function from TensorType
                # to handle NaN values.
                if TensorType.values_eq(np.asarray(data),
                                        converted_data,
                                        force_same_dtype=False):
                    data = converted_data

            up_dtype = scalar.upcast(self.dtype, data.dtype)
            if up_dtype == self.dtype:
                if not isinstance(data, gpuarray.GpuArray):
                    data = np.array(data, dtype=self.dtype, copy=False)
                else:
                    data = gpuarray.array(data, dtype=self.dtype, copy=False)
            else:
                raise TypeError(
                    f"{self} cannot store a value of dtype {data.dtype} "
                    "without risking loss of precision.")

        if self.ndim != data.ndim:
            raise TypeError(
                f"Wrong number of dimensions: expected {self.ndim}, "
                f"got {data.ndim} with shape {data.shape}.",
                data,
            )
        shp = data.shape
        for i, b in enumerate(self.broadcastable):
            if b and shp[i] != 1:
                raise TypeError(
                    "Non-unit value on shape on a broadcastable"
                    " dimension.",
                    shp,
                    self.broadcastable,
                )
        if not isinstance(data, gpuarray.GpuArray):
            if (old_data is not None and old_data.shape == data.shape and (
                    # write() only work if the destitation is contiguous.
                    old_data.flags["C_CONTIGUOUS"]
                    or old_data.flags["F_CONTIGUOUS"])):
                old_data.write(data)
                data = old_data
            else:
                data = pygpu.array(data, context=self.context)
        return data
Пример #17
0
def test_filter_variable():
    test_type = TensorType(config.floatX, [])

    with pytest.raises(TypeError):
        test_type.filter(test_type())