Exemplo n.º 1
0
def test_rmsprop_0():
    # input
    x = TT.vector(name='x')
    B = theano.shared(floatX(np.ones((3, 5))), name='B')
    c = theano.shared(floatX(np.ones(3)), name='c')
    params = [B, c]
    # output
    y_pred = TT.nnet.softmax(TT.dot(B, x.T).T + c)
    y_gold = TT.vector(name="y_gold")
    # cost and grads
    cost = TT.sum((y_pred - y_gold)**2)
    grads = TT.grad(cost, wrt=params)
    # funcs
    cost_func, update_func, rms_params = rmsprop(params, grads, [x], y_gold,
                                                 cost)
    # check return values
    assert len(rms_params) == 4
    assert isinstance(rms_params[0][0], TT.sharedvar.TensorSharedVariable)
    assert not np.any(rms_params[0][0].get_value())
    # check convergence
    X = [floatX(np.random.rand(5)) for _ in xrange(N)]
    Y = [floatX(np.random.rand(3)) for _ in xrange(N)]
    icost = init_cost = end_cost = 0.
    for i in xrange(MAX_I):
        icost = 0.
        for x, y in zip(X, Y):
            icost += cost_func(x, y)
            update_func()
        if i == 0:
            init_cost = icost
        elif i == MAX_I - 1:
            end_cost = icost
    assert end_cost < init_cost
Exemplo n.º 2
0
    def _init_w2emb(self):
        """Compute a mapping from Word2Vec to embeddings.

        Note:
          modifies instance variables in place

        """
        # construct two matrices - one with the original word2vec
        # representations and another one with task-specific embeddings
        m = len(self.w2emb_i)
        n = self.ndim
        j = len(self._aux_keys)
        w2v_emb = floatX(np.empty((m, self.w2v.ndim)))
        task_emb = floatX(np.empty((m, n)))
        k = 0
        for w, i in self.w2emb_i.iteritems():
            k = i - j
            w2v_emb[k] = floatX(self.w2v[w])
            task_emb[k] = floatX(self.W_EMB[i])
        print("Computing task-specific transform matrix...", end="",
              file=sys.stderr)
        self.w2emb, res, rank, _ = np.linalg.lstsq(w2v_emb,
                                                   task_emb)
        self.w2emb = floatX(self.w2emb)
        print(" done (w2v rank: {:d}, residuals: {:f})".format(rank, sum(res)),
              file=sys.stderr)
Exemplo n.º 3
0
    def _init_w2emb(self):
        """Compute a mapping from Word2Vec to embeddings.

        Note:
          modifies instance variables in place

        """
        # construct two matrices - one with the original word2vec
        # representations and another one with task-specific embeddings
        m = len(self.w2emb_i)
        n = self.ndim
        j = len(self._aux_keys)
        w2v_emb = floatX(np.empty((m, self.w2v.ndim)))
        task_emb = floatX(np.empty((m, n)))
        k = 0
        for w, i in self.w2emb_i.iteritems():
            k = i - j
            w2v_emb[k] = floatX(self.w2v[w])
            task_emb[k] = floatX(self.W_EMB[i])
        print("Computing task-specific transform matrix...",
              end="",
              file=sys.stderr)
        self.w2emb, res, rank, _ = np.linalg.lstsq(w2v_emb, task_emb)
        self.w2emb = floatX(self.w2emb)
        print(" done (w2v rank: {:d}, residuals: {:f})".format(rank, sum(res)),
              file=sys.stderr)
def test_rmsprop_0():
    # input
    x = TT.vector(name='x')
    B = theano.shared(floatX(np.ones((3, 5))), name='B')
    c = theano.shared(floatX(np.ones(3)), name='c')
    params = [B, c]
    # output
    y_pred = TT.nnet.softmax(TT.dot(B, x.T).T + c)
    y_gold = TT.vector(name="y_gold")
    # cost and grads
    cost = TT.sum((y_pred - y_gold)**2)
    grads = TT.grad(cost, wrt=params)
    # funcs
    cost_func, update_func, rms_params = rmsprop(params, grads,
                                                 [x], y_gold, cost)
    # check return values
    assert len(rms_params) == 4
    assert isinstance(rms_params[0][0], TT.sharedvar.TensorSharedVariable)
    assert not np.any(rms_params[0][0].get_value())
    # check convergence
    X = [floatX(np.random.rand(5)) for _ in xrange(N)]
    Y = [floatX(np.random.rand(3)) for _ in xrange(N)]
    icost = init_cost = end_cost = 0.
    for i in xrange(MAX_I):
        icost = 0.
        for x, y in zip(X, Y):
            icost += cost_func(x, y)
            update_func()
        if i == 0:
            init_cost = icost
        elif i == MAX_I - 1:
            end_cost = icost
    assert end_cost < init_cost
Exemplo n.º 5
0
    def train(self, a_ts, a_dev_data=None):
        """Method for training the model.

        Args:
        a_ts (list(2-tuple(x, y))):
          list of training JSON data
        a_dev_data (2-tuple(dict, dict) or None):
          list of development JSON data

        Returns:
        (void)

        """
        # gold vector
        y_gold = TT.dvector(name="y_gold")
        # define cost and optimization function
        cost = TT.sum((self.y_pred - y_gold) ** 2)
        # predict = theano.function([self.x, y_gold], [self.y_pred, cost],
        #                           name="predict")
        gradients = TT.grad(cost, wrt=self._params)
        f_grad_shared, f_update, _ = rmsprop(self._params, gradients,
                                             [self.x], y_gold, cost)
        # perform actual training
        min_cost = INF
        best_params = []
        start_time = end_time = None
        time_delta = prev_icost = icost = 0.
        a_ts = [(floatX(x), floatX(y)) for x, y in a_ts]
        for i in xrange(MAX_ITERS):
            icost = 0.
            np.random.shuffle(a_ts)
            start_time = datetime.utcnow()
            for x_i, y_i in a_ts:
                try:
                    icost += f_grad_shared(x_i, y_i)
                    f_update()
                except Exception as e:
                    raise e
            if icost < min_cost:
                best_params = [p.get_value() for p in self._params]
                min_cost = icost
            end_time = datetime.utcnow()
            time_delta = (end_time - start_time).seconds
            print(
                "Iteration #{:d}: cost = {:f} ({:.2f} sec)".format(i,
                                                                   icost,
                                                                   time_delta),
                file=sys.stderr)
            if abs(prev_icost - icost) < CONV_EPS:
                break
            prev_icost = icost
        # set best values seen during training
        if best_params:
            for p, val in zip(self._params, best_params):
                p.set_value(val)
Exemplo n.º 6
0
    def train(self, a_ts, a_dev_data=None):
        """Method for training the model.

        Args:
        a_ts (list(2-tuple(x, y))):
          list of training JSON data
        a_dev_data (2-tuple(dict, dict) or None):
          list of development JSON data

        Returns:
        (void)

        """
        # gold vector
        y_gold = TT.dvector(name="y_gold")
        # define cost and optimization function
        cost = TT.sum((self.y_pred - y_gold)**2)
        # predict = theano.function([self.x, y_gold], [self.y_pred, cost],
        #                           name="predict")
        gradients = TT.grad(cost, wrt=self._params)
        f_grad_shared, f_update, _ = rmsprop(self._params, gradients, [self.x],
                                             y_gold, cost)
        # perform actual training
        min_cost = INF
        best_params = []
        start_time = end_time = None
        time_delta = prev_icost = icost = 0.
        a_ts = [(floatX(x), floatX(y)) for x, y in a_ts]
        for i in xrange(MAX_ITERS):
            icost = 0.
            np.random.shuffle(a_ts)
            start_time = datetime.utcnow()
            for x_i, y_i in a_ts:
                try:
                    icost += f_grad_shared(x_i, y_i)
                    f_update()
                except Exception as e:
                    raise e
            if icost < min_cost:
                best_params = [p.get_value() for p in self._params]
                min_cost = icost
            end_time = datetime.utcnow()
            time_delta = (end_time - start_time).seconds
            print("Iteration #{:d}: cost = {:f} ({:.2f} sec)".format(
                i, icost, time_delta),
                  file=sys.stderr)
            if abs(prev_icost - icost) < CONV_EPS:
                break
            prev_icost = icost
        # set best values seen during training
        if best_params:
            for p, val in zip(self._params, best_params):
                p.set_value(val)
Exemplo n.º 7
0
    def _generate_ts(self, a_data, a_get_w_emb_i, a_get_c_emb_i):
        """Generate training set.

        Args:
          a_data (tuple):
            input data (discourse relations and parses)
          a_get_w_emb_i (method):
            custom method for retrieving the word embedding index
          a_get_c_emb_i (method):
            custom method for retrieving the conn embedding index

        Returns:
          tuple:
            lists of input features and expected classes

        """
        x, y = [], []
        if a_data is None:
            return (x, y)
        # generate features
        rels, parses = a_data
        # frequency of words in the corpus
        self._compute_w_stat(parses)
        for i, irel in rels:
            x.append((i, self._rel2x(irel, parses, a_get_w_emb_i,
                                     a_get_c_emb_i)))
            y.append(floatX(irel[SENSE]))
        return (x, y)
Exemplo n.º 8
0
    def _generate_ts(self, a_data, a_get_w_emb_i, a_get_c_emb_i):
        """Generate training set.

        Args:
          a_data (tuple):
            input data (discourse relations and parses)
          a_get_w_emb_i (method):
            custom method for retrieving the word embedding index
          a_get_c_emb_i (method):
            custom method for retrieving the conn embedding index

        Returns:
          tuple:
            lists of input features and expected classes

        """
        x, y = [], []
        if a_data is None:
            return (x, y)
        # generate features
        rels, parses = a_data
        # frequency of words in the corpus
        self._compute_w_stat(parses)
        for i, irel in rels:
            x.append((i, self._rel2x(irel, parses,
                                     a_get_w_emb_i,
                                     a_get_c_emb_i)))
            y.append(floatX(irel[SENSE]))
        return (x, y)
Exemplo n.º 9
0
 def test_svd_1(self):
     # compile function that takes preliminary inout and outputs SVD
     get_svd = theano.function([self.svd.EMB_ARG1], self.svd.ARG1,
                               name="get_svd")
     ret = get_svd(floatX(np.random.randn(20, 30)))
     import sys
     print(ret.size, ret.shape, file=sys.stderr)
     a = ret.dot(ret.T)
     assert np.allclose(a.diagonal(), np.ones(30))
     a -= np.eye(30)
     assert a.max() < 1.e-6
     assert a.min() > -1.e-5
Exemplo n.º 10
0
    def _init_w2v_emb(self):
        """Initialize word2vec embedding matrix.

        """
        w_emb = np.empty((self.w_i, self.ndim))
        w_emb[self.unk_w_i, :] = 1e-2  # prevent zeros in this row
        for w, i in self.w2emb_i.iteritems():
            if i == self.unk_w_i:
                continue
            w_emb[i] = self.w2v[w]
        self.W_EMB = theano.shared(value=floatX(w_emb), name="W_EMB")
        # We unload embeddings every time before the training to free more
        # memory.  Feel free to comment the line below, if you have plenty of
        # RAM.
        self.w2v.unload()
Exemplo n.º 11
0
    def __init__(self, a_w2v=False, a_lstsq=False, a_max_iters=MAX_ITERS):
        """Class constructor.

        Args:
          a_w2v (bool):
            use pre-trained word2vec instance
          a_lstsq (bool):
            pre-train task-specific word embeddings, but use least-square
            method to generate embeddings for unknown words from generic
            word2vec vectors
          a_max_iters (int):
            maximum number of iterations

        """
        # access to the original word2vec resource
        if a_lstsq:
            a_w2v = True
        if a_w2v:
            self.w2v = Word2Vec  # singleton object
        else:
            self.w2v = None
        self.lstsq = a_lstsq
        self._plain_w2v = self.w2v and not self.lstsq
        # matrix mapping word2vec to task-specific embeddings
        self.max_iters = a_max_iters
        self.w2emb = None
        self.ndim = -1  # vector dimensionality will be initialized later
        self.intm_dim = -1
        # mapping from word to its embedding index
        self.unk_w_i = 0
        self._aux_keys = set((0, ))
        self.w_i = 1
        self.w2emb_i = dict()
        # mapping from connective to its embedding index
        self.unk_c_i = 0
        self.c_i = 1
        self.c2emb_i = dict()
        # variables needed for training
        self._trained = False
        self._params = []
        self._w_stat = self._pred_class = None
        self.use_dropout = theano.shared(floatX(0.))
        self.W_EMB = self.CONN_EMB = self._cost = self._dev_cost = None
        # initialize theano functions to None
        self._reset_funcs()
        # set up functions for obtaining word embeddings at train and test
        # times
        self._init_wemb_funcs()
Exemplo n.º 12
0
    def __init__(self, a_w2v=False, a_lstsq=False, a_max_iters=MAX_ITERS):
        """Class constructor.

        Args:
          a_w2v (bool):
            use pre-trained word2vec instance
          a_lstsq (bool):
            pre-train task-specific word embeddings, but use least-square
            method to generate embeddings for unknown words from generic
            word2vec vectors
          a_max_iters (int):
            maximum number of iterations

        """
        # access to the original word2vec resource
        if a_lstsq:
            a_w2v = True
        if a_w2v:
            self.w2v = Word2Vec  # singleton object
        else:
            self.w2v = None
        self.lstsq = a_lstsq
        self._plain_w2v = self.w2v and not self.lstsq
        # matrix mapping word2vec to task-specific embeddings
        self.max_iters = a_max_iters
        self.w2emb = None
        self.ndim = -1    # vector dimensionality will be initialized later
        self.intm_dim = -1
        # mapping from word to its embedding index
        self.unk_w_i = 0
        self._aux_keys = set((0, ))
        self.w_i = 1
        self.w2emb_i = dict()
        # mapping from connective to its embedding index
        self.unk_c_i = 0
        self.c_i = 1
        self.c2emb_i = dict()
        # variables needed for training
        self._trained = False
        self._params = []
        self._w_stat = self._pred_class = None
        self.use_dropout = theano.shared(floatX(0.))
        self.W_EMB = self.CONN_EMB = self._cost = self._dev_cost = None
        # initialize theano functions to None
        self._reset_funcs()
        # set up functions for obtaining word embeddings at train and test
        # times
        self._init_wemb_funcs()
Exemplo n.º 13
0
    def _init_dropout(self, a_input):
        """Create a dropout layer.

        Args:
          a_input (theano.vector): input layer

        Returns:
          theano.vector: dropout layer

        """
        # the dropout layer itself
        output = TT.switch(self.use_dropout,
                           a_input * (TRNG.binomial(a_input.shape, p=0.5, n=1,
                                                    dtype=a_input.dtype)),
                           a_input * floatX(0.5))
        return output
Exemplo n.º 14
0
    def _init_w2v_emb(self):
        """Initialize word2vec embedding matrix.

        """
        w_emb = np.empty((self.w_i, self.ndim))
        w_emb[self.unk_w_i, :] = 1e-2  # prevent zeros in this row
        for w, i in self.w2emb_i.iteritems():
            if i == self.unk_w_i:
                continue
            w_emb[i] = self.w2v[w]
        self.W_EMB = theano.shared(value=floatX(w_emb),
                                   name="W_EMB")
        # We unload embeddings every time before the training to free more
        # memory.  Feel free to comment the line below, if you have plenty of
        # RAM.
        self.w2v.unload()
Exemplo n.º 15
0
    def _init_X2Y(self):
        """Initialize tensor for mapping input mtx to output vec.

        Args:
        (void)

        Returns:
        (theano.shared):
          shared theano tensor

        """
        tens = np.zeros((self.n_y, self.n_x, self.n_y))
        tens -= EPS
        for i in xrange(self.n_y):
            tens[i, :, i] = 1.
        return theano.shared(value=floatX(tens), name="X2Y")
Exemplo n.º 16
0
    def _init_dropout(self, a_input):
        """Create a dropout layer.

        Args:
          a_input (theano.vector): input layer

        Returns:
          theano.vector: dropout layer

        """
        # the dropout layer itself
        output = TT.switch(
            self.use_dropout,
            a_input *
            (TRNG.binomial(a_input.shape, p=0.5, n=1, dtype=a_input.dtype)),
            a_input * floatX(0.5))
        return output
Exemplo n.º 17
0
    def _init_X2Y(self):
        """Initialize tensor for mapping input mtx to output vec.

        Args:
        (void)

        Returns:
        (theano.shared):
          shared theano tensor

        """
        tens = np.zeros((self.n_y, self.n_x, self.n_y))
        tens -= EPS
        for i in xrange(self.n_y):
            tens[i, :, i] = 1.
        return theano.shared(value=floatX(tens),
                             name="X2Y")
Exemplo n.º 18
0
    def _get_test_w2v_lstsq_emb_i(self, a_word):
        """Obtain embedding index for the given word.

        Args:
          a_word (str):
            word whose embedding index should be retrieved

        Returns:
          np.array:
            embedding of the input word

        """
        a_word = _norm_word(a_word)
        emb_i = self.w2emb_i.get(a_word)
        if emb_i is None:
            if a_word in self.w2v:
                return floatX(np.dot(self.w2v[a_word], self.w2emb))
            return self.W_EMB[self.unk_w_i]
        return self.W_EMB[emb_i]
Exemplo n.º 19
0
    def _get_test_w2v_lstsq_emb_i(self, a_word):
        """Obtain embedding index for the given word.

        Args:
          a_word (str):
            word whose embedding index should be retrieved

        Returns:
          np.array:
            embedding of the input word

        """
        a_word = _norm_word(a_word)
        emb_i = self.w2emb_i.get(a_word)
        if emb_i is None:
            if a_word in self.w2v:
                return floatX(np.dot(self.w2v[a_word], self.w2emb))
            return self.W_EMB[self.unk_w_i]
        return self.W_EMB[emb_i]
def test_floatX_0():
    scalar = floatX(0)
    assert scalar.dtype == config.floatX
    assert isinstance(scalar, np.ndarray)
Exemplo n.º 21
0
def test_floatX_0():
    scalar = floatX(0)
    assert scalar.dtype == config.floatX
    assert isinstance(scalar, np.ndarray)
Exemplo n.º 22
0
def test_floatX_1():
    scalar = floatX(range(5))
    assert scalar.dtype == config.floatX
    assert isinstance(scalar, np.ndarray)
Exemplo n.º 23
0
    def _init_lstm(self, a_invars, a_sfx="-forward"):
        """Initialize LSTM layer.

        Args:
          a_invars (list(theano.shared)):
              list of input parameters as symbolic theano variable
          a_sfx (str):
            suffix to use for function and parameter names

        Returns:
          (2-tuple):
            parameters to be optimized and list of symbolic outputs from the
            function

        """
        intm_dim = self.intm_dim
        # initialize transformation matrices and bias term
        W_dim = (intm_dim, self.ndim)
        W = np.concatenate([
            ORTHOGONAL(W_dim),
            ORTHOGONAL(W_dim),
            ORTHOGONAL(W_dim),
            ORTHOGONAL(W_dim)
        ],
                           axis=0)
        W = theano.shared(value=W, name="W" + a_sfx)

        U_dim = (intm_dim, intm_dim)
        U = np.concatenate([
            ORTHOGONAL(U_dim),
            ORTHOGONAL(U_dim),
            ORTHOGONAL(U_dim),
            ORTHOGONAL(U_dim)
        ],
                           axis=0)
        U = theano.shared(value=U, name="U" + a_sfx)

        V = ORTHOGONAL(U_dim)  # V for vendetta
        V = theano.shared(value=V, name="V" + a_sfx)

        b_dim = (1, intm_dim * 4)
        b = theano.shared(value=HE_UNIFORM(b_dim), name="b" + a_sfx)

        params = [W, U, V, b]

        # initialize dropout units
        w_do = theano.shared(value=floatX(np.ones((4 * intm_dim, ))),
                             name="w_do")
        w_do = self._init_dropout(w_do)
        u_do = theano.shared(value=floatX(np.ones((4 * intm_dim, ))),
                             name="u_do")
        u_do = self._init_dropout(u_do)

        # custom function for splitting up matrix parts
        def _slice(_x, n, dim):
            if _x.ndim == 3:
                return _x[:, :, n * dim:(n + 1) * dim]
            return _x[:, n * dim:(n + 1) * dim]

        # define recurrent LSTM unit
        def _step(x_, h_, c_, W, U, V, b, w_do, u_do):
            """Recurrent LSTM unit.

            Note:
            The general order of function parameters to fn is:
            sequences (if any), prior result(s) (if needed),
            non-sequences (if any)

            Args:
            x_ (theano.shared): input vector
            h_ (theano.shared): output vector
            c_ (theano.shared): memory state
            W (theano.shared): input transform matrix
            U (theano.shared): inner-state transform matrix
            V (theano.shared): output transform matrix
            b (theano.shared): bias vector
            w_do (TT.col): dropout unit for the W matrix
            u_do (TT.col): dropout unit for the U matrix

            Returns:
            (2-tuple(h, c))
            new hidden and memory states

            """
            # pre-compute common terms:
            # W \in R^{236 x 100}
            # x \in R^{1 x 100}
            # U \in R^{236 x 59}
            # h \in R^{1 x 59}
            # b \in R^{1 x 236}
            # w_do \in R^{236 x 1}
            # u_do \in R^{236 x 1}

            # xhb \in R^{1 x 236}
            xhb = (TT.dot(W * w_do.dimshuffle(
                (0, 'x')), x_.T) + TT.dot(U * u_do.dimshuffle(
                    (0, 'x')), h_.T)).T + b
            # i \in R^{1 x 59}
            i = TT.nnet.sigmoid(_slice(xhb, 0, intm_dim))
            # f \in R^{1 x 59}
            f = TT.nnet.sigmoid(_slice(xhb, 1, intm_dim))
            # c \in R^{1 x 59}
            c = TT.tanh(_slice(xhb, 2, intm_dim))
            c = i * c + f * c_
            # V \in R^{59 x 59}
            # o \in R^{1 x 59}
            o = TT.nnet.sigmoid(_slice(xhb, 3, intm_dim) + TT.dot(V, c.T).T)
            # h \in R^{1 x 59}
            h = o * TT.tanh(c)
            # return current output and memory state
            return h.flatten(), c.flatten()

        m = 0
        n = intm_dim
        ov = None
        outvars = []
        for iv, igbw in a_invars:
            m = iv.shape[0]
            ret, _ = theano.scan(_step,
                                 sequences=[iv],
                                 outputs_info=[
                                     floatX(np.zeros((n, ))),
                                     floatX(np.zeros((n, )))
                                 ],
                                 non_sequences=[W, U, V, b, w_do, u_do],
                                 name="LSTM" + str(iv) + a_sfx,
                                 n_steps=m,
                                 truncate_gradient=TRUNCATE_GRADIENT,
                                 go_backwards=igbw)
            ov = ret[0]
            outvars.append(ov)
        return params, outvars
def test_floatX_1():
    scalar = floatX(range(5))
    assert scalar.dtype == config.floatX
    assert isinstance(scalar, np.ndarray)
Exemplo n.º 25
0
    def train(self,
              a_train_data,
              a_dev_data=None,
              a_n_y=-1,
              a_i=-1,
              a_train_out=None,
              a_dev_out=None):
        """Method for training the model.

        Args:
          a_train_data (2-tuple(list, dict)):
            list of training JSON data
          a_dev_data (2-tuple(list, dict) or None):
            list of development JSON data
          a_n_y (int):
            number of distinct classes
          a_i (int):
            row index for the output predictions
          a_train_out (np.array or None):
            predictions for the training set
          a_dev_out (np.array or None):
            predictions for the training set

        Returns:
          void:

        Note:
          updates ``a_train_out`` and ``a_dev_out`` in place

        """
        self.n_y = a_n_y
        # allocate data to development set if there is none
        if a_dev_data is None or not a_dev_data[0]:
            train_rels, parses = a_train_data
            docs = parses.keys()
            n_docs = len(docs)
            n_dev = max(n_docs / 10, 1)
            # sample without replacement
            dev_docs = set(np.random.choice(docs, n_dev, False))
            for ddname in dev_docs:
                print("dev_doc = '{:s}'".format(ddname).encode(ENCODING),
                      file=sys.stderr)
            new_train_rels, dev_rels = [], []
            for irel in train_rels:
                # relations are numbered at this place
                if irel[-1][DOC_ID] in dev_docs:
                    dev_rels.append(irel)
                else:
                    new_train_rels.append(irel)
            a_train_data = (new_train_rels, parses)
            a_dev_data = (dev_rels, parses)
        # convert training and development sets to features
        x_train, y_train = self._generate_ts(a_train_data,
                                             self.get_train_w_emb_i,
                                             self.get_train_c_emb_i)
        x_dev, y_dev = self._generate_ts(a_dev_data, self.get_test_w_emb_i,
                                         self.get_test_c_emb_i)
        # initialize the network
        self._init_nn()
        # activate dropout for training
        self.use_dropout.set_value(1.)

        # perform the training
        best_params = []
        dev_err = dev_cost = 0.
        prev_train_cost = train_cost = 0.
        min_dev_err = min_dev_cost = INF
        try:
            for i in xrange(self.max_iters):
                train_cost = 0.
                start_time = datetime.utcnow()
                # perform one training iteration
                for (_, (emb1, emb2, conn)), y in zip(x_train, y_train):
                    train_cost += self._grad_shared(emb1, emb2, conn, y)
                    self._update()
                # estimate the model on the dev set
                dev_err = dev_cost = 0.
                # temporarily deactivate dropout
                self.use_dropout.set_value(0.)
                for (_, (emb1, emb2, conn)), y in zip(x_dev, y_dev):
                    dev_err += (y[self._predict_class(emb1, emb2, conn)] == 0)
                    dev_cost += self._compute_dev_cost(emb1, emb2, conn, y)
                # switch dropout on again
                self.use_dropout.set_value(1.)
                end_time = datetime.utcnow()
                time_delta = (end_time - start_time).seconds
                if min_dev_err == INF or dev_err < min_dev_err or \
                   (dev_err == min_dev_err and
                        dev_cost < min_dev_cost):
                    best_params = [p.get_value() for p in self._params]
                    min_dev_err = dev_err
                    min_dev_cost = dev_cost
                print("Iteration {:d}:\ttrain_cost = {:f}\t"
                      "dev_err={:d}\tdev_cost={:f}\t({:.2f} sec)".format(
                          i, train_cost, int(dev_err), dev_cost, time_delta),
                      file=sys.stderr)
                if abs(prev_train_cost - train_cost) < CONV_EPS:
                    break
                prev_train_cost = train_cost
        except BaseException as e:
            print("ERROR: '{:s}'".format(e.message))
        # deactivate dropout
        self.use_dropout.set_value(0.)
        if best_params:
            for p, val in zip(self._params, best_params):
                p.set_value(val)
        else:
            raise RuntimeError("Network could not be trained.")
        # make predictions for the judge
        if a_i >= 0:
            # deactivate dropout once again
            if a_train_out is not None:
                for i, x_i in x_train:
                    self._predict(x_i, a_train_out[i], a_i)
            if a_dev_out:
                for i, x_i in x_dev:
                    self._predict(x_i, a_dev_out[i], a_i)
            else:
                for i, x_i in x_dev:
                    self._predict(x_i, a_train_out[i], a_i)
        # reset function members to allow cPickle store this model
        self._reset_funcs()
        self._cleanup(self._rms_params)
        if self.w2v:
            self.W_EMB = floatX(self.W_EMB.get_value())
        if self.lstsq:
            self.w2v.load()
            self._init_w2emb()
        if self.w2v is not None:
            self.w2v.unload()
            self.w2v = None
        self._trained = True
Exemplo n.º 26
0
    def train(self, a_train_data, a_dev_data=None, a_n_y=-1,
              a_i=-1, a_train_out=None, a_dev_out=None):
        """Method for training the model.

        Args:
          a_train_data (2-tuple(list, dict)):
            list of training JSON data
          a_dev_data (2-tuple(list, dict) or None):
            list of development JSON data
          a_n_y (int):
            number of distinct classes
          a_i (int):
            row index for the output predictions
          a_train_out (np.array or None):
            predictions for the training set
          a_dev_out (np.array or None):
            predictions for the training set

        Returns:
          void:

        Note:
          updates ``a_train_out`` and ``a_dev_out`` in place

        """
        self.n_y = a_n_y
        # allocate data to development set if there is none
        if a_dev_data is None or not a_dev_data[0]:
            train_rels, parses = a_train_data
            docs = parses.keys()
            n_docs = len(docs)
            n_dev = max(n_docs / 10, 1)
            # sample without replacement
            dev_docs = set(np.random.choice(docs, n_dev, False))
            for ddname in dev_docs:
                print("dev_doc = '{:s}'".format(ddname).encode(ENCODING),
                      file=sys.stderr)
            new_train_rels, dev_rels = [], []
            for irel in train_rels:
                # relations are numbered at this place
                if irel[-1][DOC_ID] in dev_docs:
                    dev_rels.append(irel)
                else:
                    new_train_rels.append(irel)
            a_train_data = (new_train_rels, parses)
            a_dev_data = (dev_rels, parses)
        # convert training and development sets to features
        x_train, y_train = self._generate_ts(a_train_data,
                                             self.get_train_w_emb_i,
                                             self.get_train_c_emb_i)
        x_dev, y_dev = self._generate_ts(a_dev_data,
                                         self.get_test_w_emb_i,
                                         self.get_test_c_emb_i)
        # initialize the network
        self._init_nn()
        # activate dropout for training
        self.use_dropout.set_value(1.)

        # perform the training
        best_params = []
        dev_err = dev_cost = 0.
        prev_train_cost = train_cost = 0.
        min_dev_err = min_dev_cost = INF
        try:
            for i in xrange(self.max_iters):
                train_cost = 0.
                start_time = datetime.utcnow()
                # perform one training iteration
                for (_, (emb1, emb2, conn)), y in zip(x_train, y_train):
                    train_cost += self._grad_shared(emb1, emb2, conn, y)
                    self._update()
                # estimate the model on the dev set
                dev_err = dev_cost = 0.
                # temporarily deactivate dropout
                self.use_dropout.set_value(0.)
                for (_, (emb1, emb2, conn)), y in zip(x_dev, y_dev):
                    dev_err += (y[self._predict_class(emb1, emb2, conn)] == 0)
                    dev_cost += self._compute_dev_cost(emb1, emb2, conn, y)
                # switch dropout on again
                self.use_dropout.set_value(1.)
                end_time = datetime.utcnow()
                time_delta = (end_time - start_time).seconds
                if min_dev_err == INF or dev_err < min_dev_err or \
                   (dev_err == min_dev_err and
                        dev_cost < min_dev_cost):
                    best_params = [p.get_value() for p in self._params]
                    min_dev_err = dev_err
                    min_dev_cost = dev_cost
                print("Iteration {:d}:\ttrain_cost = {:f}\t"
                      "dev_err={:d}\tdev_cost={:f}\t({:.2f} sec)".format(
                          i, train_cost, int(dev_err), dev_cost, time_delta),
                      file=sys.stderr)
                if abs(prev_train_cost - train_cost) < CONV_EPS:
                    break
                prev_train_cost = train_cost
        except BaseException as e:
            print("ERROR: '{:s}'".format(e.message))
        # deactivate dropout
        self.use_dropout.set_value(0.)
        if best_params:
            for p, val in zip(self._params, best_params):
                p.set_value(val)
        else:
            raise RuntimeError("Network could not be trained.")
        # make predictions for the judge
        if a_i >= 0:
            # deactivate dropout once again
            if a_train_out is not None:
                for i, x_i in x_train:
                    self._predict(x_i, a_train_out[i], a_i)
            if a_dev_out:
                for i, x_i in x_dev:
                    self._predict(x_i, a_dev_out[i], a_i)
            else:
                for i, x_i in x_dev:
                    self._predict(x_i, a_train_out[i], a_i)
        # reset function members to allow cPickle store this model
        self._reset_funcs()
        self._cleanup(self._rms_params)
        if self.w2v:
            self.W_EMB = floatX(self.W_EMB.get_value())
        if self.lstsq:
            self.w2v.load()
            self._init_w2emb()
        if self.w2v is not None:
            self.w2v.unload()
            self.w2v = None
        self._trained = True