def test_shared_state2(self):
        a = T.scalar()  # the a is for 'anonymous' (un-named).
        x, s = T.scalars('xs')

        f = function([
            x,
            In(a, value=1.0, name='a'),
            In(s, value=0.0, update=s + a * x, mutable=False)
        ], s + a * x)
        g = function(
            [x, In(a, value=1.0, name='a'),
             In(s, value=f.container[s])], s + a * x)

        f(1, 2)
        self.assertTrue(f[s] == 2)
        self.assertTrue(g[s] == 2)
        f(1, 2)
        self.assertTrue(f[s] == 4)
        self.assertTrue(g[s] == 4)
        g(1, 2)  # has no effect on state
        self.assertTrue(f[s] == 4)
        self.assertTrue(g[s] == 4)
    def __init__(self):
        a = T.scalar()  # the a is for 'anonymous' (un-named).
        x, s = T.scalars('xs')
        v = T.vector('v')

        self.s = s
        self.x = x
        self.v = v

        self.e = a * x + s

        self.f1 = function([
            x,
            In(a, value=1.0, name='a'),
            In(s, value=0.0, update=s + a * x, mutable=True)
        ], s + a * x)

        self.f2 = function([
            x,
            In(a, value=1.0, name='a'),
            In(s, value=self.f1.container[s], update=s + a * x, mutable=True)
        ], s + a * x)
    def test_weird_names(self):
        a, x, s = T.scalars('xxx')

        checkfor(self, lambda: function([In(a, name=[])], []), TypeError)

        def t():
            f = function([
                In(a, name=set(['adsf', ()]), value=1.0),
                In(x, name=(), value=2.0),
                In(s, name=T.scalar(), value=3.0)
            ], a + x + s)

        checkfor(self, t, TypeError)
示例#4
0
    def test_naming_rule3(self):
        a = T.scalar()  # the a is for 'anonymous' (un-named).
        x, s = T.scalars('xs')

        # x's name is not ignored (as in test_naming_rule2) because a has a default value.
        f = function([x, In(a, value=1.0), s], a / s + x)
        self.assertTrue(f(9, 2, 4) == 9.5)  # can specify all args in order
        self.assertTrue(f(9, 2, s=4) == 9.5)  # can give s as kwarg
        self.assertTrue(f(9, s=4) == 9.25)  # can give s as kwarg, get default a
        self.assertTrue(f(x=9, s=4) == 9.25)  # can give s as kwarg, omit a, x as kw
        checkfor(self, lambda: f(x=9, a=2, s=4), TypeError)  # got unexpected keyword argument 'a'
        checkfor(self, lambda: f(), TypeError)  # takes exactly 3 non-keyword arguments (0 given)
        checkfor(self, lambda: f(x=9), TypeError)  # takes exactly 3 non-keyword arguments (1 given)
    def test_deepcopy_shared_container(self):
        # Ensure that shared containers remain shared after a deep copy.
        a, x = T.scalars('ax')

        h = function([In(a, value=0.0)], a)
        f = function([x, In(a, value=h.container[a], implicit=True)], x + a)

        try:
            memo = {}
            ac = copy.deepcopy(a)
            memo.update({id(a): ac})
            hc = copy.deepcopy(h, memo=memo)
            memo.update({id(h): hc})
            fc = copy.deepcopy(f, memo=memo)
        except NotImplementedError as e:
            if e[0].startswith('DebugMode is not picklable'):
                return
            else:
                raise
        h[a] = 1
        hc[ac] = 2
        self.assertTrue(f[a] == 1)
        self.assertTrue(fc[ac] == 2)
示例#6
0
    def test_shared_state1(self):
        a = tt.scalar()  # the a is for 'anonymous' (un-named).
        x, s = tt.scalars("xs")

        f = function(
            [
                x,
                In(a, value=1.0, name="a"),
                In(s, value=0.0, update=s + a * x, mutable=True),
            ],
            s + a * x,
        )
        g = function(
            [x, In(a, value=1.0, name="a"), In(s, value=f.container[s])], s + a * x
        )

        f(1, 2)
        assert f[s] == 2
        assert g[s] == 2
        f(1, 2)
        g(1, 2)
        assert f[s] == 4
        assert g[s] == 4
示例#7
0
    def test_in_allow_downcast_vector_floatX(self):
        a = theano.tensor.fvector('a')
        b = theano.tensor.fvector('b')
        c = theano.tensor.fvector('c')

        f = theano.function([
            In(a, allow_downcast=True),
            In(b, allow_downcast=False),
            In(c, allow_downcast=None)
        ], (a + b + c))

        # If the values can be accurately represented, everything is OK
        z = [0]
        assert numpy.all(f(z, z, z) == 0)

        # If allow_downcast is True, idem
        assert numpy.allclose(f([0.1], z, z), 0.1)

        # If allow_downcast is False, nope
        self.assertRaises(TypeError, f, z, [0.1], z)

        # If allow_downcast is None, like False
        self.assertRaises(TypeError, f, z, z, [0.1])
示例#8
0
    def test_naming_rule4(self):
        a = T.scalar() # the a is for 'anonymous' (un-named).
        x,s = T.scalars('xs')

        f = function([x, In(a, value=1.0,name='a'), s], a/s+x)

        self.assertTrue(f(9,2,4) == 9.5) #can specify all args in order
        self.assertTrue(f(9,2,s=4) == 9.5) # can give s as kwarg
        self.assertTrue(f(9,s=4) == 9.25) # can give s as kwarg, get default a
        self.assertTrue(f(9,a=2,s=4) == 9.5) # can give s as kwarg, a as kwarg
        self.assertTrue(f(x=9,a=2, s=4) == 9.5) # can give all kwargs
        self.assertTrue(f(x=9,s=4) == 9.25) # can give all kwargs
        checkfor(self, lambda:f(), TypeError) #takes exactly 3 non-keyword arguments (0 given)
        checkfor(self, lambda:f(5.0,x=9), TypeError) #got multiple values for keyword argument 'x'
示例#9
0
    def test_copy(self):
        a = T.scalar()  # the a is for 'anonymous' (un-named).
        x, s = T.scalars('xs')

        f = function([x, In(a, value=1.0, name='a'),
                      In(s, value=0.0, update=s + a * x, mutable=True)],
                     s + a * x)

        g = copy.copy(f)
        # if they both return, assume  that they return equivalent things.

        self.assertFalse(g.container[x].storage is f.container[x].storage)
        self.assertFalse(g.container[a].storage is f.container[a].storage)
        self.assertFalse(g.container[s].storage is f.container[s].storage)

        self.assertFalse(g.value[a] is not f.value[a])  # should not have been copied
        self.assertFalse(g.value[s] is f.value[s])  # should have been copied because it is mutable.
        self.assertFalse((g.value[s] != f.value[s]).any())  # its contents should be identical

        self.assertTrue(f(2, 1) == g(2))  # they should be in sync, default value should be copied.
        self.assertTrue(f(2, 1) == g(2))  # they should be in sync, default value should be copied.
        f(1, 2)  # put them out of sync
        self.assertFalse(f(1, 2) == g(1, 2))  # they should not be equal anymore.
示例#10
0
    def test_in_allow_downcast_floatX(self):
        a = theano.tensor.fscalar('a')
        b = theano.tensor.fscalar('b')
        c = theano.tensor.fscalar('c')

        f = theano.function([In(a, allow_downcast=True),
                             In(b, allow_downcast=False),
                             In(c, allow_downcast=None)],
                            (a + b + c))

        # If the values can be accurately represented, everything is OK
        assert numpy.all(f(0, 0, 0) == 0)

        # If allow_downcast is True, idem
        assert numpy.allclose(f(0.1, 0, 0), 0.1)

        # If allow_downcast is False, nope
        self.assertRaises(TypeError, f, 0, 0.1, 0)

        # If allow_downcast is None, it should work iff floatX=float32
        if theano.config.floatX == 'float32':
            assert numpy.allclose(f(0, 0, 0.1), 0.1)
        else:
            self.assertRaises(TypeError, f, 0, 0, 0.1)
    def test_deepcopy_trust_input(self):
        a = T.dscalar()  # the a is for 'anonymous' (un-named).
        x, s = T.dscalars('xs')

        f = function([
            x,
            In(a, value=1.0, name='a'),
            In(s, value=0.0, update=s + a * x, mutable=True)
        ], s + a * x)
        f.trust_input = True
        try:
            g = copy.deepcopy(f)
        except NotImplementedError as e:
            if e[0].startswith('DebugMode is not picklable'):
                return
            else:
                raise
        self.assertTrue(f.trust_input is g.trust_input)
        f(np.asarray(2.))
        self.assertRaises((ValueError, AttributeError,
                           theano.compile.debugmode.InvalidValueError), f, 2.)
        g(np.asarray(2.))
        self.assertRaises((ValueError, AttributeError,
                           theano.compile.debugmode.InvalidValueError), g, 2.)
示例#12
0
    def test_in_update_shared(self):
        # Test that using both In() with updates and shared variables with
        # updates in the same function behaves as expected
        shared_var = theano.shared(1.0)
        a = theano.tensor.dscalar("a")
        a_wrapped = In(a, value=0.0, update=shared_var)
        f = function([a_wrapped], [], updates={shared_var: a}, mode="FAST_RUN")

        # Ensure that, through the executions of the function, the state of
        # the input and the shared variable are appropriate (after N execution,
        # the values have swapped N times). This allows testing that the
        # changes occur at the same time and one doesn't overwrite the other.
        for i in range(5):
            f()
            assert np.allclose(shared_var.get_value(), i % 2)
示例#13
0
    def test_multiple_functions(self):
        a = T.scalar() # the a is for 'anonymous' (un-named).
        x,s = T.scalars('xs')
        v = T.vector('v')


        # put in some inputs
        list_of_things = [s, x, v]

        # some derived thing, whose inputs aren't all in the list
        list_of_things.append(a * x + s )

        f1 = function([x, In(a, value=1.0,name='a'), In(s, value=0.0, update=s+a*x, mutable=True)], s+a*x)
        list_of_things.append(f1)

        # now put in a function sharing container with the previous one
        f2 = function([x, In(a, value=1.0,name='a'), In(s, value=f1.container[s], update=s+a*x, mutable=True)], s+a*x)
        list_of_things.append(f2)

        assert isinstance(f2.container[s].storage, list)
        assert f2.container[s].storage is f1.container[s].storage

        # now put in a function with non-scalar
        v_value = numpy.asarray([2,3,4.], dtype=config.floatX)
        f3 = function([x, In(v, value=v_value)], x+v)
        list_of_things.append(f3)

        # try to pickle the entire things
        try:
            saved_format = cPickle.dumps(list_of_things, protocol=-1)
            new_list_of_things = cPickle.loads(saved_format)
        except NotImplementedError, e:
            if e[0].startswith('DebugMode is not picklable'):
                return
            else:
                raise
示例#14
0
    def test_state_access(self):
        a = tt.scalar()  # the a is for 'anonymous' (un-named).
        x, s = tt.scalars("xs")

        f = function(
            [x, In(a, value=1.0, name="a"), In(s, value=0.0, update=s + a * x)],
            s + a * x,
        )

        assert f[a] == 1.0
        assert f[s] == 0.0

        assert f(3.0) == 3.0
        assert f(3.0, a=2.0) == 9.0  # 3.0 + 2*3.0

        assert (
            f[a] == 1.0
        )  # state hasn't changed permanently, we just overrode it last line
        assert f[s] == 9.0

        f[a] = 5.0
        assert f[a] == 5.0
        assert f(3.0) == 24.0  # 9 + 3*5
        assert f[s] == 24.0
示例#15
0
    def test_weird_names(self):
        a, x, s = tt.scalars("xxx")

        checkfor(self, lambda: function([In(a, name=[])], []), TypeError)

        def t():
            f = function(
                [
                    In(a, name={"adsf", ()}, value=1.0),
                    In(x, name=(), value=2.0),
                    In(s, name=tt.scalar(), value=3.0),
                ],
                a + x + s,
            )
            return f

        checkfor(self, t, TypeError)
示例#16
0
def pfunc(
    params,
    outputs=None,
    mode=None,
    updates=None,
    givens=None,
    no_default_updates=False,
    accept_inplace=False,
    name=None,
    rebuild_strict=True,
    allow_input_downcast=None,
    profile=None,
    on_unused_input=None,
    output_keys=None,
):
    """
    Function-constructor for graphs with shared variables.

    Parameters
    ----------
    params : list of either Variable or In instances
        Function parameters, these are not allowed to be shared variables.
    outputs : list of Variables or Out instances
        Expressions to compute.
    mode : string or `theano.compile.Mode` instance
        Compilation mode.
    updates : iterable over pairs (shared_variable, new_expression). List, tuple or dict.
        Update the values for SharedVariable inputs according to these
        expressions
    givens : iterable over pairs (Var1, Var2) of Variables. List, tuple or dict.
        The Var1 and Var2 in each pair must have the same Type. Specific
        substitutions to make in the computation graph (Var2 replaces Var1).
    no_default_updates : either bool or list of Variables
        If True, do not perform any automatic update on Variables.
        If False (default), perform them all. Else, perform automatic updates
        on all Variables that are neither in "updates" nor in
        "no_default_updates".
    accept_inplace : bool
        True iff the graph can contain inplace operations prior to the
        optimization phase (default is False). *Note* this parameter is unsupported,
        and its use is not recommended.
    name : None or string
        Attaches a name to the profiling result of this function.
    allow_input_downcast : bool
        True means that the values passed as inputs when calling the function
        can be silently downcasted to fit the dtype of the corresponding
        Variable, which may lose precision. False means that it will only be cast to a more
        general, or precise, type. None (default) is almost like
        False, but allows downcasting of Python float scalars to
        floatX.
    profile : None, True, str, or ProfileStats instance
        Accumulate profiling information into a given ProfileStats instance.
        None is the default, and means to use the value of config.profile.
        If argument is `True` then a new ProfileStats instance will be used.
        If argument is a string, a new ProfileStats instance will be created
        with that string as its `message` attribute. This profiling object will
        be available via self.profile.
    on_unused_input : {'raise', 'warn','ignore', None}
        What to do if a variable in the 'inputs' list is not used in the graph.

    Returns
    -------
    theano.compile.Function
        A callable object that will compute the outputs (given the inputs) and
        update the implicit function arguments according to the `updates`.

    Notes
    -----
    Regarding givens: Be careful to make sure that these substitutions are
    independent--behaviour when Var1 of one pair appears in the graph leading
    to Var2 in another expression is undefined. Replacements specified with
    givens are different from optimizations in that Var2 is not expected to be
    equivalent to Var1.

    """
    #
    # This function works by cloning the graph (except for the
    # inputs), and then shipping it off to compile.function (There it
    # will be cloned again, unnecessarily, because it doesn't know
    # that we already cloned it.)
    #
    # First, it clones the replacements named in the givens argument,
    # and points each Var1 to the clone of Var2.  Then it sets the
    # inputs in the clone dictionary.  After these steps, we are
    # assuming that the clone dictionary contains all the inputs to
    # the computation graph.
    #
    # Then it clones the outputs and the update expressions.  This
    # rebuilds a computation graph from the inputs and the givens.
    #
    if updates is None:
        updates = []
    if givens is None:
        givens = []
    if profile is None:
        profile = config.profile or config.print_global_stats
        # profile -> True or False
        if profile is False:
            profile = None
    if profile is True:
        profile = ProfileStats(message=name)
        # profile -> object
    elif type(profile) == str:
        profile = ProfileStats(message=profile)
    # profile is typically either False or an object at this point.
    # No need to block other objects being passed through though. It might be
    # useful.

    if not isinstance(params, (list, tuple)):
        raise Exception("in pfunc() the first argument must be a list or " "a tuple")

    if not isinstance(no_default_updates, bool) and not isinstance(
        no_default_updates, list
    ):
        raise TypeError("no_default_update should be either a boolean or " "a list")

    if len(updates) > 0 and any(
        isinstance(v, Variable) for v in iter_over_pairs(updates)
    ):
        raise ValueError(
            "The updates parameter must be an OrderedDict/dict or a list of "
            "lists/tuples with 2 elements"
        )

    # transform params into theano.compile.In objects.
    inputs = [
        _pfunc_param_to_in(p, allow_downcast=allow_input_downcast) for p in params
    ]

    # Check if some variable is present more than once in inputs
    in_variables = [input.variable for input in inputs]
    for i, v in enumerate(in_variables):
        if v in in_variables[(i + 1) :]:
            dup_v_i = in_variables.index(v, (i + 1))
            raise UnusedInputError(
                "Variable %s is used twice in inputs to theano.function, "
                "at indices %i and %i.  This would result in values "
                "provided for it being ignored. Please do not duplicate "
                "variables in the inputs list." % (v, i, dup_v_i)
            )

    # Check that we are not using `givens` to replace input variables, because
    # this typically does nothing, contrary to what one may expect.
    in_var_set = set(in_variables)
    try:
        givens_pairs = list(givens.items())
    except AttributeError:
        givens_pairs = givens
    for x, y in givens_pairs:
        if x in in_var_set:
            raise RuntimeError(
                "You are trying to replace variable '%s' through the "
                "`givens` parameter, but this variable is an input to your "
                "function. Replacing inputs is currently forbidden because it "
                "has no effect. One way to modify an input `x` to a function "
                "evaluating f(x) is to define a new input `y` and use "
                "`theano.function([y], f(x), givens={x: g(y)})`. Another "
                "solution consists in using `theano.clone`, e.g. like this: "
                "`theano.function([x], "
                "theano.clone(f(x), replace={x: g(x)}))`." % x
            )

    # Extend the outputs with the updates on input variables so they are also
    # cloned
    additional_outputs = [i.update for i in inputs if i.update]
    if outputs is None:
        out_list = []
    else:
        if isinstance(outputs, (list, tuple)):
            out_list = list(outputs)
        else:
            out_list = [outputs]
    extended_outputs = out_list + additional_outputs

    output_vars = rebuild_collect_shared(
        extended_outputs,
        in_variables,
        replace=givens,
        updates=updates,
        rebuild_strict=rebuild_strict,
        copy_inputs_over=True,
        no_default_updates=no_default_updates,
    )
    # extracting the arguments
    input_variables, cloned_extended_outputs, other_stuff = output_vars
    clone_d, update_d, update_expr, shared_inputs = other_stuff

    # Recover only the clones of the original outputs
    if outputs is None:
        cloned_outputs = []
    else:
        if isinstance(outputs, (list, tuple)):
            cloned_outputs = cloned_extended_outputs[: len(outputs)]
        else:
            cloned_outputs = cloned_extended_outputs[0]

    for i, iv in zip(inputs, input_variables):
        i.variable = iv

        # If needed, replace the input's update by its cloned equivalent
        if i.update:
            i.update = clone_d[i.update]

    for sv in shared_inputs:
        # pass value of None
        # value will be stored in the resulting functions' defaults
        # list but since the value of shared variables never needs to
        # be refed, it is not needed
        if sv in update_d:
            si = In(
                variable=sv,
                value=sv.container,
                mutable=True,
                borrow=True,
                update=update_d[sv],
                shared=True,
            )
        else:
            si = In(
                variable=sv, value=sv.container, mutable=False, borrow=True, shared=True
            )
        inputs.append(si)

    return orig_function(
        inputs,
        cloned_outputs,
        mode,
        accept_inplace=accept_inplace,
        name=name,
        profile=profile,
        on_unused_input=on_unused_input,
        output_keys=output_keys,
    )
 def fn():
     x, s = T.scalars('xs')
     function([In(x, update=((s * s) + x))], x)
 def fn():
     x, s = T.scalars('xs')
     function([In(x, update=s + x)], x)
示例#19
0
 def t():
     f = function([
         In(a, name=set(['adsf', ()]), value=1.0),
         In(x, name=(), value=2.0),
         In(s, name=T.scalar(), value=3.0)
     ], a + x + s)
示例#20
0
    def __init__(self, num_hidden, num_classes, context_win_size, embeddings,
                 featdim=0, fine_tuning=False, truncate_gradient=-1):
        """
        num_hidden :: dimension of the hidden layer
        num_classes :: number of classes
        context_win_size :: word window context size
        embeddings :: matrix
        """
        # hyper parameters of the model

        self.hyperparams = {}

        # nh :: dimension of the hidden layer
        nh = num_hidden
        self.hyperparams['nh'] = nh

        # nc :: number of classes
        nc = num_classes
        self.hyperparams['nc'] = nc

        # de :: dimension of the word embeddings
        de = embeddings.shape[1]
        self.hyperparams['de'] = de

        # cs :: word window context size
        cs = context_win_size
        self.hyperparams['cs'] = cs

        self.hyperparams['featdim'] = featdim
        self.hyperparams['fine_tuning'] = fine_tuning
        self.hyperparams['truncate_gradient'] = truncate_gradient

        # parameters of the model
        self.emb = theano.shared(embeddings.astype(theano.config.floatX))

        # inputs
        idxs = T.imatrix()
        w = T.fscalar('w')
        x = self.emb[idxs].reshape((idxs.shape[0], de * cs))*w
        y = T.iscalar('y')
        y_sentence = T.ivector('y_sentence')
        f = T.matrix('f')
        f.reshape((idxs.shape[0], featdim))

        # forward parameters of the model
        self.fWx = theano.shared(0.2 * np.random.uniform(-1.0, 1.0,
                                                         (de * cs, nh)).astype(theano.config.floatX))

        self.fWh = theano.shared(0.2 * np.random.uniform(-1.0, 1.0,
                                                         (nh, nh)).astype(theano.config.floatX))

        self.fbh = theano.shared(np.zeros(nh, dtype=theano.config.floatX))

        self.fh0 = theano.shared(np.zeros(nh, dtype=theano.config.floatX))

        fparams = [self.fWx, self.fWh, self.fbh, self.fh0]
        fnames = ['fWx', 'fWh', 'fbh', 'fh0']

        def frecurrence(x_t, h_tm1):
            h_t = T.nnet.sigmoid(T.dot(x_t, self.fWx) + T.dot(h_tm1, self.fWh) + self.fbh)
            return h_t

        fh, _ = theano.scan(fn=frecurrence,
                            sequences=x,
                            outputs_info=[self.fh0],
                            n_steps=x.shape[0],
                            truncate_gradient=truncate_gradient)

        # backwards parameters of the model
        self.bWx = theano.shared(0.2 * np.random.uniform(-1.0, 1.0,
                                                         (de * cs, nh)).astype(theano.config.floatX))

        self.bWh = theano.shared(0.2 * np.random.uniform(-1.0, 1.0,
                                                         (nh, nh)).astype(theano.config.floatX))

        self.bbh = theano.shared(np.zeros(nh, dtype=theano.config.floatX))

        self.bh0 = theano.shared(np.zeros(nh, dtype=theano.config.floatX))

        bparams = [self.bWx, self.bWh, self.bbh, self.bh0]
        bnames = ['bWx', 'bWh', 'bbh', 'bh0']

        def brecurrence(x_t, h_tm1):
            h_t = T.nnet.sigmoid(T.dot(x_t, self.bWx) + T.dot(h_tm1, self.bWh) + self.bbh)
            return h_t

        bh, _ = theano.scan(fn=brecurrence,
                            sequences=x,
                            outputs_info=[self.bh0],
                            n_steps=x.shape[0],
                            go_backwards=True,
                            truncate_gradient=truncate_gradient)

        # inverting backwards hidden
        bh = bh[::-1]

        # concatenation parameters
        self.bW = theano.shared(0.2 * np.random.uniform(-1.0, 1.0,
                                                        (nh+featdim, nc)).astype(theano.config.floatX))

        self.fW = theano.shared(0.2 * np.random.uniform(-1.0, 1.0,
                                                        (nh+featdim, nc)).astype(theano.config.floatX))

        self.b = theano.shared(np.zeros(nc, dtype=theano.config.floatX))

        # adding features
        if featdim > 0:
            fh_final = T.concatenate([fh, f], axis=1)
            bh_final = T.concatenate([bh, f], axis=1)
        else:
            fh_final = fh
            bh_final = bh

        # "concatenating" forward and backward hidden states
        h = T.dot(bh_final, self.bW) + T.dot(fh_final, self.fW)

        s = T.nnet.softmax(h + self.b)

        p_y_given_x_lastword = s[-1, :]
        p_y_given_x_sentence = s

        self.params = fparams + bparams + [self.bW, self.fW, self.b]
        self.names = fnames + bnames + ['bW', 'fW', 'b']

        if fine_tuning:
            self.params.append(self.emb)
            self.names.append("emb")

        # prediction
        y_pred = T.argmax(p_y_given_x_sentence, axis=1)

        # cost functions
        sentence_nll = -T.mean(T.log(p_y_given_x_sentence)
                               [T.arange(x.shape[0]), y_sentence])

        nll = -T.mean(T.log(p_y_given_x_lastword)[y])

        # gradients
        sentence_gradients = T.grad(sentence_nll, self.params)
        gradients = T.grad(nll, self.params)

        # learning rate
        lr = T.scalar('lr')

        # updates
        sentence_updates = OrderedDict((p, p - lr * g)
                                       for p, g in
                                       zip(self.params, sentence_gradients))

        updates = OrderedDict((p, p - lr * g)
                              for p, g in
                              zip(self.params, gradients))

        # theano functions
        self.classify = theano.function(inputs=[idxs, f, In(w, value=1.0)],
                                        outputs=y_pred,
                                        on_unused_input='ignore')

        self.sentence_train = theano.function(inputs=[idxs, f, y_sentence, lr, In(w, value=1.0)],
                                              outputs=sentence_nll,
                                              updates=sentence_updates,
                                              on_unused_input='ignore')

        self.train = theano.function(inputs=[idxs, f, y, lr, In(w, value=1.0)],
                                     outputs=nll,
                                     updates=updates,
                                     on_unused_input='ignore')

        self.predict = theano.function(inputs=[idxs, f, In(w, value=1.0)],
                                       outputs=p_y_given_x_sentence,
                                       on_unused_input='ignore')

        self.normalize = theano.function(inputs=[],
                                         updates={self.emb:\
                                                  self.emb/T.sqrt((self.emb**2).sum(axis=1)).dimshuffle(0, 'x')})
示例#21
0
 def test_in_shared_variable(self):
     # Ensure that an error is raised if the In wrapped is used to wrap
     # a shared variable
     a = theano.shared(1.0)
     a_wrapped = In(a, update=a + 1)
     self.assertRaises(TypeError, theano.function, [a_wrapped])
示例#22
0
 def fn():
     x,s = T.scalars('xs')
     fn = function([In(x, update=mul(s,s)+x)], x)
示例#23
0
    def __init__(self,
                 num_hidden,
                 num_classes,
                 context_win_size,
                 embeddings,
                 featdim=0,
                 fine_tuning=False,
                 truncate_gradient=-1):
        """
        num_hidden :: dimension of the hidden layer
        num_classes :: number of classes
        embeddings :: matrix
        featdim :: size of the features
        """
        # hyper parameters of the model
        self.hyperparams = {}

        # nh :: dimension of the hidden layer
        nh = num_hidden
        self.hyperparams['nh'] = nh

        # nc :: number of classes
        nc = num_classes
        self.hyperparams['nc'] = nc

        # de :: dimension of the word embeddings
        de = embeddings.shape[1]
        self.hyperparams['de'] = de

        # cs :: word window context size
        cs = context_win_size
        self.hyperparams['cs'] = cs

        self.hyperparams['featdim'] = featdim
        self.hyperparams['fine_tuning'] = fine_tuning
        self.hyperparams['truncate_gradient'] = truncate_gradient

        # add one for PADDING at the end
        self.emb = theano.shared(embeddings.astype(theano.config.floatX))

        n_in = de * cs
        n_hidden = n_i = n_c = n_o = n_f = nh
        n_y = nc

        idxs = T.imatrix()
        w = T.fscalar('w')
        # as many columns as context window size/lines as words in the sentence
        x = self.emb[idxs].reshape((idxs.shape[0], de * cs)) * w
        f = T.matrix('f')
        f.reshape((idxs.shape[0], featdim))
        y = T.iscalar('y')  # label
        y_sentence = T.ivector('y_sentence')

        # forward weights

        self.fW_xi = theano.shared(0.2 * uniform(-1.0, 1.0,
                                                 (n_in, n_i)).astype(dtype))

        self.fW_hi = theano.shared(
            0.2 * uniform(-1.0, 1.0, (n_hidden, n_i)).astype(dtype))

        self.fW_ci = theano.shared(0.2 * uniform(-1.0, 1.0,
                                                 (n_c, n_i)).astype(dtype))

        self.fb_i = theano.shared(np.cast[dtype](uniform(-0.5, .5, size=n_i)))

        self.fW_xf = theano.shared(0.2 * uniform(-1.0, 1.0,
                                                 (n_in, n_f)).astype(dtype))

        self.fW_hf = theano.shared(
            0.2 * uniform(-1.0, 1.0, (n_hidden, n_f)).astype(dtype))

        self.fW_cf = theano.shared(0.2 * uniform(-1.0, 1.0,
                                                 (n_c, n_f)).astype(dtype))

        self.fb_f = theano.shared(np.cast[dtype](uniform(0, 1., size=n_f)))

        self.fW_xc = theano.shared(0.2 * uniform(-1.0, 1.0,
                                                 (n_in, n_c)).astype(dtype))

        self.fW_hc = theano.shared(
            0.2 * uniform(-1.0, 1.0, (n_hidden, n_c)).astype(dtype))

        self.fb_c = theano.shared(np.zeros(n_c, dtype=dtype))

        self.fW_xo = theano.shared(0.2 * uniform(-1.0, 1.0,
                                                 (n_in, n_o)).astype(dtype))

        self.fW_ho = theano.shared(
            0.2 * uniform(-1.0, 1.0, (n_hidden, n_o)).astype(dtype))

        self.fW_co = theano.shared(0.2 * uniform(-1.0, 1.0,
                                                 (n_c, n_o)).astype(dtype))

        self.fb_o = theano.shared(np.cast[dtype](uniform(-0.5, .5, size=n_o)))

        self.fc0 = theano.shared(np.zeros(n_hidden, dtype=dtype))

        self.fh0 = T.tanh(self.fc0)

        fparams = [
            self.fW_xi, self.fW_hi, self.fW_ci, self.fb_i, self.fW_xf,
            self.fW_hf, self.fW_cf, self.fb_f, self.fW_xc, self.fW_hc,
            self.fb_c, self.fW_xo, self.fW_ho, self.fW_co, self.fb_o, self.fc0
        ]

        fnames = [
            'fW_xi', 'fW_hi', 'fW_ci', 'fb_i', 'fW_xf', 'fW_hf', 'fW_cf',
            'fb_f', 'fW_xc', 'fW_hc', 'fb_c', 'fW_xo', 'fW_ho', 'fW_co',
            'fb_o', 'fc0'
        ]

        def frecurrence(x_t, h_tm1, c_tm1):
            i_t = sigma(
                theano.dot(x_t, self.fW_xi) + theano.dot(h_tm1, self.fW_hi) +
                theano.dot(c_tm1, self.fW_ci) + self.fb_i)

            f_t = sigma(
                theano.dot(x_t, self.fW_xf) + theano.dot(h_tm1, self.fW_hf) +
                theano.dot(c_tm1, self.fW_cf) + self.fb_f)

            c_t = f_t * c_tm1 + i_t * T.tanh(
                theano.dot(x_t, self.fW_xc) + theano.dot(h_tm1, self.fW_hc) +
                self.fb_c)

            o_t = sigma(
                theano.dot(x_t, self.fW_xo) + theano.dot(h_tm1, self.fW_ho) +
                theano.dot(c_t, self.fW_co) + self.fb_o)

            h_t = o_t * T.tanh(c_t)

            return [h_t, c_t]

        [
            fh,
            _,
        ], _ = theano.scan(fn=frecurrence,
                           sequences=[x],
                           outputs_info=[self.fh0, self.fc0],
                           n_steps=x.shape[0],
                           truncate_gradient=truncate_gradient)

        # backward weights

        self.bW_xi = theano.shared(0.2 * uniform(-1.0, 1.0,
                                                 (n_in, n_i)).astype(dtype))

        self.bW_hi = theano.shared(
            0.2 * uniform(-1.0, 1.0, (n_hidden, n_i)).astype(dtype))

        self.bW_ci = theano.shared(0.2 * uniform(-1.0, 1.0,
                                                 (n_c, n_i)).astype(dtype))

        self.bb_i = theano.shared(np.cast[dtype](uniform(-0.5, .5, size=n_i)))

        self.bW_xf = theano.shared(0.2 * uniform(-1.0, 1.0,
                                                 (n_in, n_f)).astype(dtype))

        self.bW_hf = theano.shared(
            0.2 * uniform(-1.0, 1.0, (n_hidden, n_f)).astype(dtype))

        self.bW_cf = theano.shared(0.2 * uniform(-1.0, 1.0,
                                                 (n_c, n_f)).astype(dtype))

        self.bb_f = theano.shared(np.cast[dtype](uniform(0, 1., size=n_f)))

        self.bW_xc = theano.shared(0.2 * uniform(-1.0, 1.0,
                                                 (n_in, n_c)).astype(dtype))

        self.bW_hc = theano.shared(
            0.2 * uniform(-1.0, 1.0, (n_hidden, n_c)).astype(dtype))

        self.bb_c = theano.shared(np.zeros(n_c, dtype=dtype))

        self.bW_xo = theano.shared(0.2 * uniform(-1.0, 1.0,
                                                 (n_in, n_o)).astype(dtype))

        self.bW_ho = theano.shared(
            0.2 * uniform(-1.0, 1.0, (n_hidden, n_o)).astype(dtype))

        self.bW_co = theano.shared(0.2 * uniform(-1.0, 1.0,
                                                 (n_c, n_o)).astype(dtype))

        self.bb_o = theano.shared(np.cast[dtype](uniform(-0.5, .5, size=n_o)))

        self.bc0 = theano.shared(np.zeros(n_hidden, dtype=dtype))

        self.bh0 = T.tanh(self.bc0)

        bparams = [
            self.bW_xi, self.bW_hi, self.bW_ci, self.bb_i, self.bW_xf,
            self.bW_hf, self.bW_cf, self.bb_f, self.bW_xc, self.bW_hc,
            self.bb_c, self.bW_xo, self.bW_ho, self.bW_co, self.bb_o, self.bc0
        ]

        bnames = [
            'bW_xi', 'bW_hi', 'bW_ci', 'bb_i', 'bW_xf', 'bW_hf', 'bW_cf',
            'bb_f', 'bW_xc', 'bW_hc', 'bb_c', 'bW_xo', 'bW_ho', 'bW_co',
            'bb_o', 'bc0'
        ]

        def brecurrence(x_t, h_tm1, c_tm1):
            i_t = sigma(
                theano.dot(x_t, self.bW_xi) + theano.dot(h_tm1, self.bW_hi) +
                theano.dot(c_tm1, self.bW_ci) + self.bb_i)

            f_t = sigma(
                theano.dot(x_t, self.bW_xf) + theano.dot(h_tm1, self.bW_hf) +
                theano.dot(c_tm1, self.bW_cf) + self.bb_f)

            c_t = f_t * c_tm1 + i_t * T.tanh(
                theano.dot(x_t, self.bW_xc) + theano.dot(h_tm1, self.bW_hc) +
                self.bb_c)

            o_t = sigma(
                theano.dot(x_t, self.bW_xo) + theano.dot(h_tm1, self.bW_ho) +
                theano.dot(c_t, self.bW_co) + self.bb_o)

            h_t = o_t * T.tanh(c_t)

            return [h_t, c_t]

        [
            bh,
            _,
        ], _ = theano.scan(fn=brecurrence,
                           sequences=[x],
                           outputs_info=[self.bh0, self.bc0],
                           n_steps=x.shape[0],
                           go_backwards=True,
                           truncate_gradient=truncate_gradient)

        # concatenation weights

        self.bW = theano.shared(
            0.2 * uniform(-1.0, 1.0, (n_hidden + featdim, n_y)).astype(dtype))

        self.fW = theano.shared(
            0.2 * uniform(-1.0, 1.0, (n_hidden + featdim, n_y)).astype(dtype))

        self.b = theano.shared(np.zeros(n_y, dtype=dtype))

        # reversing backwards hidden
        bh = bh[::-1]

        # adding features
        if featdim > 0:
            fh_final = T.concatenate([fh, f], axis=1)
            bh_final = T.concatenate([bh, f], axis=1)
        else:
            fh_final = fh
            bh_final = bh

        # "concatenating" hidden states
        h = T.dot(bh_final, self.bW) + T.dot(fh_final, self.fW)

        s = T.nnet.softmax(h + self.b)

        p_y_given_x_lastword = s[-1, :]
        p_y_given_x_sentence = s

        # params and names
        self.params = fparams + bparams + [self.fW, self.bW, self.b]
        self.names = fnames + bnames + ["fW", "bW", "b"]

        if fine_tuning:
            self.params.append(self.emb)
            self.names.append("embeddings")

        y_pred = T.argmax(p_y_given_x_sentence, axis=1)

        # learning rate
        lr = T.scalar('lr')

        # cost functions
        sentence_nll = -T.mean(
            T.log(p_y_given_x_sentence)[T.arange(x.shape[0]), y_sentence])

        nll = -T.mean(T.log(p_y_given_x_lastword)[y])

        # gradients
        gradients = T.grad(nll, self.params)
        sentence_gradients = T.grad(sentence_nll, self.params)

        # updates
        updates = OrderedDict(
            (p, p - lr * g) for p, g in zip(self.params, gradients))

        sentence_updates = OrderedDict(
            (p, p - lr * g) for p, g in zip(self.params, sentence_gradients))

        # theano functions
        self.classify = theano.function(inputs=[idxs, f,
                                                In(w, value=1.0)],
                                        outputs=y_pred,
                                        on_unused_input='ignore')

        self.train = theano.function(inputs=[idxs, f, y, lr,
                                             In(w, value=1.0)],
                                     outputs=nll,
                                     updates=updates,
                                     on_unused_input='ignore')

        self.sentence_train = theano.function(
            inputs=[idxs, f, y_sentence, lr,
                    In(w, value=1.0)],
            outputs=sentence_nll,
            updates=sentence_updates,
            on_unused_input='ignore')

        self.predict = theano.function(inputs=[idxs, f,
                                               In(w, value=1.0)],
                                       outputs=p_y_given_x_sentence,
                                       on_unused_input='ignore')

        self.normalize = theano.function(
            inputs=[],
            updates={
                self.emb:
                self.emb / T.sqrt(
                    (self.emb**2).sum(axis=1)).dimshuffle(0, 'x')
            })
示例#24
0
 def fn():
     x, s = tt.scalars("xs")
     function([In(x, update=((s * s) + x))], x)
示例#25
0
    def __init__(self, num_hidden, num_classes, context_win_size,
                 embeddings, featdim=0, fine_tuning=False, truncate_gradient=-1):
        """
        num_hidden :: dimension of the hidden layer
        num_classes :: number of classes
        context_win_size :: word window context size
        embeddings :: matrix
        featdim :: size of the features
        """
        # hyper parameters of the model

        self.hyperparams = {}

        # nh :: dimension of the hidden layer
        nh = num_hidden
        self.hyperparams['nh'] = nh

        # nc :: number of classes
        nc = num_classes
        self.hyperparams['nc'] = nc

        # de :: dimension of the word embeddings
        de = embeddings.shape[1]
        self.hyperparams['de'] = de

        # cs :: word window context size
        cs = context_win_size
        self.hyperparams['cs'] = cs

        self.hyperparams['featdim'] = featdim
        self.hyperparams['fine_tuning'] = fine_tuning
        self.hyperparams['truncate_gradient'] = truncate_gradient

        # parameters of the model
        self.emb = theano.shared(embeddings.astype(theano.config.floatX))

        # weights for LSTM
        n_in = de * cs
        n_hidden = n_i = n_c = n_o = n_f = nh
        n_y = nc

        self.W_xi = theano.shared(0.2 * uniform(-1.0, 1.0,
                                                (n_in, n_i)).astype(dtype))
        self.W_hi = theano.shared(0.2 * uniform(-1.0, 1.0,
                                                (n_hidden, n_i)).astype(dtype))
        self.W_ci = theano.shared(0.2 * uniform(-1.0, 1.0,
                                                (n_c, n_i)).astype(dtype))
        self.b_i = theano.shared(np.cast[dtype](uniform(-0.5, .5,
                                                        size=n_i)))
        self.W_xf = theano.shared(0.2 * uniform(-1.0, 1.0,
                                                (n_in, n_f)).astype(dtype))
        self.W_hf = theano.shared(0.2 * uniform(-1.0, 1.0,
                                                (n_hidden, n_f)).astype(dtype))
        self.W_cf = theano.shared(0.2 * uniform(-1.0, 1.0,
                                                (n_c, n_f)).astype(dtype))
        self.b_f = theano.shared(np.cast[dtype](uniform(0, 1.,
                                                        size=n_f)))
        self.W_xc = theano.shared(0.2 * uniform(-1.0, 1.0,
                                                (n_in, n_c)).astype(dtype))
        self.W_hc = theano.shared(0.2 * uniform(-1.0, 1.0,
                                                (n_hidden, n_c)).astype(dtype))
        self.b_c = theano.shared(np.zeros(n_c, dtype=dtype))
        self.W_xo = theano.shared(0.2 * uniform(-1.0, 1.0,
                                                (n_in, n_o)).astype(dtype))
        self.W_ho = theano.shared(0.2 * uniform(-1.0, 1.0,
                                                (n_hidden, n_o)).astype(dtype))
        self.W_co = theano.shared(0.2 * uniform(-1.0, 1.0,
                                                (n_c, n_o)).astype(dtype))
        self.b_o = theano.shared(np.cast[dtype](uniform(-0.5, .5, size=n_o)))
        self.W_hy = theano.shared(0.2 * uniform(-1.0, 1.0,
                                                (n_hidden + featdim, n_y)).astype(dtype))
        self.b_y = theano.shared(np.zeros(n_y, dtype=dtype))
        self.c0 = theano.shared(np.zeros(n_hidden, dtype=dtype))
        self.h0 = T.tanh(self.c0)

        # bundle weights
        self.params = [self.W_xi, self.W_hi, self.W_ci, self.b_i,
                       self.W_xf, self.W_hf, self.W_cf, self.b_f, self.W_xc,
                       self.W_hc, self.b_c, self.W_xo, self.W_ho, self.W_co,
                       self.b_o, self.W_hy, self.b_y, self.c0]

        self.names = ['W_xi', 'W_hi', 'W_ci', 'b_i', 'W_xf',
                      'W_hf', 'W_cf', 'b_f', 'W_xc', 'W_hc', 'b_c', 'W_xo',
                      'W_ho', 'W_co', 'b_o', 'W_hy', 'b_y', 'c0']

        if fine_tuning:
            self.params.append(self.emb)
            self.names.append("embeddings")

        idxs = T.imatrix()
        w = T.fscalar('w')
        # as many columns as context window size/lines as words in the sentence
        x = self.emb[idxs].reshape((idxs.shape[0], de * cs))*w
        f = T.matrix('f')
        f.reshape((idxs.shape[0], featdim))
        y = T.iscalar('y')  # label
        y_sentence = T.ivector('y_sentence')

        def recurrence(x_t, feat_t, h_tm1, c_tm1):
            i_t = sigma(theano.dot(x_t, self.W_xi)
                        + theano.dot(h_tm1, self.W_hi)
                        + theano.dot(c_tm1, self.W_ci)
                        + self.b_i)

            f_t = sigma(theano.dot(x_t, self.W_xf)
                        + theano.dot(h_tm1, self.W_hf)
                        + theano.dot(c_tm1, self.W_cf)
                        + self.b_f)

            c_t = f_t * c_tm1 + i_t * T.tanh(theano.dot(x_t, self.W_xc)
                                             + theano.dot(h_tm1, self.W_hc)
                                             + self.b_c)

            o_t = sigma(theano.dot(x_t, self.W_xo)
                        + theano.dot(h_tm1, self.W_ho)
                        + theano.dot(c_t, self.W_co)
                        + self.b_o)

            h_t = o_t * T.tanh(c_t)

            if featdim > 0:
                all_t = T.concatenate([h_t, feat_t])
            else:
                all_t = h_t

            s_t = softmax(theano.dot(all_t, self.W_hy) + self.b_y)

            return [h_t, c_t, s_t]

        [h, _, s], _ = theano.scan(fn=recurrence,
                                   sequences=[x, f],
                                   outputs_info=[self.h0, self.c0, None],
                                   n_steps=x.shape[0],
                                   truncate_gradient=truncate_gradient)

        p_y_given_x_lastword = s[-1, 0, :]
        p_y_given_x_sentence = s[:, 0, :]
        y_pred = T.argmax(p_y_given_x_sentence, axis=1)

        # cost and gradients and learning rate
        lr = T.scalar('lr')

        # cost functions
        sentence_nll = -T.mean(T.log(p_y_given_x_sentence)
                               [T.arange(x.shape[0]), y_sentence])
        nll = -T.mean(T.log(p_y_given_x_lastword)[y])

        # gradients
        sentence_gradients = T.grad(sentence_nll, self.params)
        gradients = T.grad(nll, self.params)

        # updates
        updates = OrderedDict((p, p - lr * g)
                              for p, g in zip(self.params, gradients))

        sentence_updates = OrderedDict((p, p - lr * g)
                                       for p, g in
                                       zip(self.params, sentence_gradients))

        # theano functions
        self.classify = theano.function(inputs=[idxs, f, In(w, value=1.0)],
                                        outputs=y_pred)

        self.sentence_train = theano.function(inputs=[idxs, f, y_sentence, lr, In(w, value=1.0)],
                                              outputs=sentence_nll,
                                              updates=sentence_updates)

        self.train = theano.function(inputs=[idxs, f, y, lr, In(w, value=1.0)],
                                     outputs=nll,
                                     updates=updates)

        self.predict = theano.function(inputs=[idxs, f, In(w, value=1.0)],
                                       outputs=p_y_given_x_sentence)

        self.normalize = theano.function(inputs=[],
                                         updates={self.emb:
                                                  self.emb / T.sqrt((self.emb ** 2).sum(axis=1)).dimshuffle(0, 'x')})
示例#26
0
    def test_random_function_ndim_added(self):
        # Test that random_function helper function accepts ndim_added as
        # keyword argument
        # If using numpy's uniform distribution, ndim_added should be 0,
        # because the shape provided as argument is the output shape.
        # Specifying a different ndim_added will change the Op's output ndim,
        # so np.uniform will produce a result of incorrect shape,
        # and a ValueError should be raised.
        def ndim_added_deco(ndim_added):
            def randomfunction(random_state,
                               size=(),
                               low=0.0,
                               high=0.0,
                               ndim=None):
                ndim, size, bcast = raw_random._infer_ndim_bcast(ndim, size)
                if ndim_added < 0:
                    bcast = bcast[:ndim_added]
                else:
                    bcast = bcast + ((False, ) * ndim_added)
                assert len(bcast) == ndim + ndim_added
                op = RandomFunction(
                    "uniform",
                    tensor.TensorType(dtype="float64", broadcastable=bcast),
                    ndim_added=ndim_added,
                )
                return op(random_state, size, low, high)

            return randomfunction

        uni_1 = ndim_added_deco(1)
        uni_0 = ndim_added_deco(0)
        uni_m1 = ndim_added_deco(-1)

        rng_R = random_state_type()

        p_uni11, uni11 = uni_1(rng_R, size=(4, ))
        p_uni12, uni12 = uni_1(rng_R, size=(3, 4))
        p_uni01, uni01 = uni_0(rng_R, size=(4, ))
        p_uni02, uni02 = uni_0(rng_R, size=(3, 4))
        p_unim11, unim11 = uni_m1(rng_R, size=(4, ))
        p_unim12, unim12 = uni_m1(rng_R, size=(3, 4))

        assert uni11.ndim == 2
        assert uni12.ndim == 3
        assert uni01.ndim == 1
        assert uni02.ndim == 2
        assert unim11.ndim == 0
        assert unim12.ndim == 1

        f11 = function(
            [
                In(
                    rng_R,
                    value=np.random.RandomState(utt.fetch_seed()),
                    update=p_uni11,
                    mutable=True,
                )
            ],
            [uni11],
            accept_inplace=True,
        )
        f12 = function(
            [
                In(
                    rng_R,
                    value=np.random.RandomState(utt.fetch_seed()),
                    update=p_uni12,
                    mutable=True,
                )
            ],
            [uni12],
            accept_inplace=True,
        )
        fm11 = function(
            [
                In(
                    rng_R,
                    value=np.random.RandomState(utt.fetch_seed()),
                    update=p_unim11,
                    mutable=True,
                )
            ],
            [unim11],
            accept_inplace=True,
        )
        fm12 = function(
            [
                In(
                    rng_R,
                    value=np.random.RandomState(utt.fetch_seed()),
                    update=p_unim12,
                    mutable=True,
                )
            ],
            [unim12],
            accept_inplace=True,
        )
        f0 = function(
            [
                In(
                    rng_R,
                    value=np.random.RandomState(utt.fetch_seed()),
                    update=p_uni02,
                    mutable=True,
                )
            ],
            [uni01, uni02],
            accept_inplace=True,
        )
        with pytest.raises(ValueError):
            f11()
        with pytest.raises(ValueError):
            f12()
        with pytest.raises(ValueError):
            fm11()
        with pytest.raises(ValueError):
            fm12()
        u01, u02 = f0()
        assert np.allclose(u01, u02[0])
示例#27
0
    def __init__(self,
                 num_hidden,
                 num_classes,
                 context_win_size,
                 embeddings,
                 featdim=0,
                 fine_tuning=False,
                 truncate_gradient=-1):
        """
        num_hidden :: dimension of the hidden layer
        num_classes :: number of classes
        context_win_size :: word window context size
        embeddings :: matrix
        """
        # hyper parameters of the model

        self.hyperparams = {}

        # nh :: dimension of the hidden layer
        nh = num_hidden
        self.hyperparams['nh'] = nh

        # nc :: number of classes
        nc = num_classes
        self.hyperparams['nc'] = nc

        # de :: dimension of the word embeddings
        de = embeddings.shape[1]
        self.hyperparams['de'] = de

        # cs :: word window context size
        cs = context_win_size
        self.hyperparams['cs'] = cs

        self.hyperparams['featdim'] = featdim
        self.hyperparams['fine_tuning'] = fine_tuning
        self.hyperparams['truncate_gradient'] = truncate_gradient

        # parameters

        self.Wx = theano.shared(
            0.2 *
            np.random.uniform(-1.0, 1.0,
                              (de * cs, nh)).astype(theano.config.floatX))

        self.Ws = theano.shared(
            0.2 * np.random.uniform(-1.0, 1.0,
                                    (nc, nh)).astype(theano.config.floatX))

        # V matrix
        self.W = theano.shared(
            0.2 *
            np.random.uniform(-1.0, 1.0,
                              (nh + featdim, nc)).astype(theano.config.floatX))

        self.bh = theano.shared(np.zeros(nh, dtype=theano.config.floatX))

        self.b = theano.shared(np.zeros(nc, dtype=theano.config.floatX))

        self.s0 = theano.shared(np.zeros(nc, dtype=theano.config.floatX))

        self.emb = theano.shared(embeddings.astype(theano.config.floatX))

        # bundle
        self.params = [self.Wx, self.Ws, self.W, self.bh, self.b, self.s0]
        self.names = ['Wx', 'Wh', 'W', 'bh', 'b', 's0']

        if fine_tuning:
            self.params.append(self.emb)
            self.names.append("emb")

        idxs = T.imatrix()
        w = T.fscalar('w')
        x = self.emb[idxs].reshape((idxs.shape[0], de * cs)) * w
        y = T.iscalar('y')
        y_sentence = T.ivector('y_sentence')
        f = T.matrix('f')
        f.reshape((idxs.shape[0], featdim))

        def recurrence(x_t, feat_t, s_tm1):
            h_t = T.nnet.sigmoid(
                T.dot(x_t, self.Wx) + T.dot(s_tm1, self.Ws) + self.bh)

            if featdim > 0:
                all_t = T.concatenate([h_t, feat_t])
            else:
                all_t = h_t

            s_t = T.nnet.softmax(T.dot(all_t, self.W) + self.b)[0]
            return [h_t, s_t]

        [h, s], _ = theano.scan(fn=recurrence,
                                sequences=[x, f],
                                outputs_info=[None, self.s0],
                                n_steps=x.shape[0],
                                truncate_gradient=truncate_gradient)

        # probabilities
        p_y_given_x_sentence = s
        p_y_given_x_lastword = s[-1, :]

        # prediction
        y_pred = T.argmax(p_y_given_x_sentence, axis=1)

        # cost functions
        sentence_nll = -T.mean(
            T.log(p_y_given_x_sentence)[T.arange(x.shape[0]), y_sentence])

        nll = -T.mean(T.log(p_y_given_x_lastword)[y])

        # gradients
        sentence_gradients = T.grad(sentence_nll, self.params)
        gradients = T.grad(nll, self.params)

        # learning rate
        lr = T.scalar('lr')

        # updates
        sentence_updates = OrderedDict(
            (p, p - lr * g) for p, g in zip(self.params, sentence_gradients))

        updates = OrderedDict(
            (p, p - lr * g) for p, g in zip(self.params, gradients))

        # theano functions
        self.classify = theano.function(inputs=[idxs, f,
                                                In(w, value=1.0)],
                                        outputs=y_pred)

        self.sentence_train = theano.function(
            inputs=[idxs, f, y_sentence, lr,
                    In(w, value=1.0)],
            outputs=sentence_nll,
            updates=sentence_updates)

        self.train = theano.function(inputs=[idxs, f, y, lr,
                                             In(w, value=1.0)],
                                     outputs=nll,
                                     updates=updates)

        self.predict = theano.function(inputs=[idxs, f,
                                               In(w, value=1.0)],
                                       outputs=p_y_given_x_sentence)

        self.normalize = theano.function(
            inputs=[],
            updates={
                self.emb:
                self.emb / T.sqrt(
                    (self.emb**2).sum(axis=1)).dimshuffle(0, 'x')
            })
示例#28
0
    def test_permutation_helper(self):
        # Test that raw_random.permutation_helper generates the same
        # results as numpy,
        # and that the 'ndim_added' keyword behaves correctly.

        # permutation_helper needs "ndim_added=1", because its output
        # is one dimension more than its "shape" argument (and there's
        # no way to determine that automatically).
        # Check the working case, over two calls to see if the random
        # state is correctly updated.
        rf = RandomFunction(permutation_helper,
                            tensor.imatrix,
                            8,
                            ndim_added=1)
        rng_R = random_state_type()
        post_r, out = rf(rng_R, (7, ), 8)

        f = function(
            [
                In(
                    rng_R,
                    value=np.random.RandomState(utt.fetch_seed()),
                    update=post_r,
                    mutable=True,
                )
            ],
            [out],
            accept_inplace=True,
        )

        numpy_rng = np.random.RandomState(utt.fetch_seed())
        val0 = f()
        val1 = f()
        # numpy_rng.permutation outputs one vector at a time,
        # so we call it iteratively to generate all the samples.
        numpy_val0 = np.asarray([numpy_rng.permutation(8) for i in range(7)])
        numpy_val1 = np.asarray([numpy_rng.permutation(8) for i in range(7)])
        assert np.all(val0 == numpy_val0)
        assert np.all(val1 == numpy_val1)

        # This call lacks "ndim_added=1", so ndim_added defaults to 0.
        # A ValueError should be raised.
        rf0 = RandomFunction(permutation_helper, tensor.imatrix, 8)
        post_r0, out0 = rf0(rng_R, (7, ), 8)
        f0 = function(
            [
                In(
                    rng_R,
                    value=np.random.RandomState(utt.fetch_seed()),
                    update=post_r0,
                    mutable=True,
                )
            ],
            [out0],
            accept_inplace=True,
        )
        with pytest.raises(ValueError):
            f0()

        # Here, ndim_added is 2 instead of 1. A ValueError should be raised.
        rf2 = RandomFunction(permutation_helper,
                             tensor.imatrix,
                             8,
                             ndim_added=2)
        post_r2, out2 = rf2(rng_R, (7, ), 8)
        f2 = function(
            [
                In(
                    rng_R,
                    value=np.random.RandomState(utt.fetch_seed()),
                    update=post_r2,
                    mutable=True,
                )
            ],
            [out2],
            accept_inplace=True,
        )
        with pytest.raises(ValueError):
            f2()
    def test_multiple_functions(self):
        a = T.scalar()  # the a is for 'anonymous' (un-named).
        x, s = T.scalars('xs')
        v = T.vector('v')

        # put in some inputs
        list_of_things = [s, x, v]

        # some derived thing, whose inputs aren't all in the list
        list_of_things.append(a * x + s)

        f1 = function([
            x,
            In(a, value=1.0, name='a'),
            In(s, value=0.0, update=s + a * x, mutable=True)
        ], s + a * x)
        list_of_things.append(f1)

        # now put in a function sharing container with the previous one
        f2 = function([
            x,
            In(a, value=1.0, name='a'),
            In(s, value=f1.container[s], update=s + a * x, mutable=True)
        ], s + a * x)
        list_of_things.append(f2)

        assert isinstance(f2.container[s].storage, list)
        assert f2.container[s].storage is f1.container[s].storage

        # now put in a function with non-scalar
        v_value = np.asarray([2, 3, 4.], dtype=config.floatX)
        f3 = function([x, In(v, value=v_value)], x + v)
        list_of_things.append(f3)

        # try to pickle the entire things
        try:
            saved_format = pickle.dumps(list_of_things, protocol=-1)
            new_list_of_things = pickle.loads(saved_format)
        except NotImplementedError as e:
            if e[0].startswith('DebugMode is not picklable'):
                return
            else:
                raise

        # now test our recovered new_list_of_things
        # it should be totally unrelated to the original
        # it should be interdependent in the same way as the original

        ol = list_of_things
        nl = new_list_of_things

        for i in range(4):
            assert nl[i] != ol[i]
            assert nl[i].type == ol[i].type
            assert nl[i].type is not ol[i].type

        # see if the implicit input got stored
        assert ol[3].owner.inputs[1] is s
        assert nl[3].owner.inputs[1] is not s
        assert nl[3].owner.inputs[1].type == s.type

        # moving on to the functions...
        for i in range(4, 7):
            assert nl[i] != ol[i]

        # looking at function number 1, input 's'
        assert nl[4][nl[0]] is not ol[4][ol[0]]
        assert nl[4][nl[0]] == ol[4][ol[0]]
        assert nl[4](3) == ol[4](3)

        # looking at function number 2, input 's'
        # make sure it's shared with the first function
        assert ol[4].container[ol[0]].storage is ol[5].container[ol[0]].storage
        assert nl[4].container[nl[0]].storage is nl[5].container[nl[0]].storage
        assert nl[5](3) == ol[5](3)
        assert nl[4].value[nl[0]] == 6

        assert np.all(nl[6][nl[2]] == np.asarray([2, 3., 4]))
示例#30
0
 def fn():
     x, s = tt.scalars("xs")
     function([In(x, update=s + x)], x)