Пример #1
0
    def __init__(self, name, x, y, lr, init_emb, vocab_size, emb_dim,
                 hidden_dim, output_dim, window, opt):

        assert window % 2 == 1, 'Window size must be odd'
        """ input """
        self.name = name
        self.x = x
        self.y = y
        self.lr = lr
        self.input = [self.x, self.y, self.lr]

        n_words = x.shape[0]
        """ params """
        if init_emb is not None:
            self.emb = theano.shared(init_emb)
        else:
            self.emb = theano.shared(sample_weights(vocab_size, emb_dim))

        self.W_in = theano.shared(
            sample_weights(hidden_dim, 1, window, emb_dim))
        self.W_out = theano.shared(sample_weights(hidden_dim, output_dim))

        self.b_in = theano.shared(sample_weights(hidden_dim, 1))
        self.b_y = theano.shared(sample_weights(output_dim))

        self.params = [self.W_in, self.W_out, self.b_in, self.b_y]
        """ pad """
        self.zero = theano.shared(
            np.zeros(shape=(1, 1, window / 2, emb_dim),
                     dtype=theano.config.floatX))
        """ look up embedding """
        self.x_emb = self.emb[self.x]  # x_emb: 1D: n_words, 2D: n_emb
        """ convolution """
        self.x_in = self.conv(self.x_emb)
        """ feed-forward computation """
        self.h = relu(
            self.x_in.reshape((self.x_in.shape[1], self.x_in.shape[2])) +
            T.repeat(self.b_in, T.cast(self.x_in.shape[2], 'int32'), 1)).T
        self.o = T.dot(self.h, self.W_out) + self.b_y
        self.p_y_given_x = T.nnet.softmax(self.o)
        """ prediction """
        self.y_pred = T.argmax(self.o, axis=1)
        self.result = T.eq(self.y_pred, self.y)
        """ cost function """
        self.nll = -T.sum(T.log(self.p_y_given_x)[T.arange(n_words), self.y])
        self.cost = self.nll

        if opt == 'sgd':
            self.updates = sgd(self.cost, self.params, self.emb, self.x_emb,
                               self.lr)
        else:
            self.updates = ada_grad(self.cost, self.params, self.emb,
                                    self.x_emb, self.x, self.lr)
Пример #2
0
    def __init__(self, name, x, y, lr, init_emb, vocab_size, emb_dim, hidden_dim, output_dim, window, opt):

        assert window % 2 == 1, 'Window size must be odd'

        """ input """
        self.name = name
        self.x = x
        self.y = y
        self.lr = lr
        self.input = [self.x, self.y, self.lr]

        n_words = x.shape[0]

        """ params """
        if init_emb is not None:
            self.emb = theano.shared(init_emb)
        else:
            self.emb = theano.shared(sample_weights(vocab_size, emb_dim))

        self.W_in = theano.shared(sample_weights(hidden_dim, 1, window, emb_dim))
        self.W_out = theano.shared(sample_weights(hidden_dim, output_dim))

        self.b_in = theano.shared(sample_weights(hidden_dim, 1))
        self.b_y = theano.shared(sample_weights(output_dim))

        self.params = [self.W_in, self.W_out, self.b_in, self.b_y]

        """ pad """
        self.zero = theano.shared(np.zeros(shape=(1, 1, window / 2, emb_dim), dtype=theano.config.floatX))

        """ look up embedding """
        self.x_emb = self.emb[self.x]  # x_emb: 1D: n_words, 2D: n_emb

        """ convolution """
        self.x_in = self.conv(self.x_emb)

        """ feed-forward computation """
        self.h = relu(self.x_in.reshape((self.x_in.shape[1], self.x_in.shape[2])) + T.repeat(self.b_in, T.cast(self.x_in.shape[2], 'int32'), 1)).T
        self.o = T.dot(self.h, self.W_out) + self.b_y
        self.p_y_given_x = T.nnet.softmax(self.o)

        """ prediction """
        self.y_pred = T.argmax(self.o, axis=1)
        self.result = T.eq(self.y_pred, self.y)

        """ cost function """
        self.nll = -T.sum(T.log(self.p_y_given_x)[T.arange(n_words), self.y])
        self.cost = self.nll

        if opt == 'sgd':
            self.updates = sgd(self.cost, self.params, self.emb, self.x_emb, self.lr)
        else:
            self.updates = ada_grad(self.cost, self.params, self.emb, self.x_emb, self.x, self.lr)
Пример #3
0
    def __init__(self, n_h, pooling, k=2):
        self.k = k
#        self.W_m = theano.shared(sample_weights(n_h, n_h))
        self.W_m1 = theano.shared(sample_weights(n_h, n_h))
        self.W_m2 = theano.shared(sample_weights(n_h, n_h))
        self.W_m3 = theano.shared(sample_weights(n_h, n_h))
#        self.W_c = theano.shared(sample_weights(n_h * (k+1), n_h))
        self.W_c = theano.shared(sample_weights(n_h * 2, n_h))
#        self.W_k = theano.shared(sample_weights(n_h, n_h))
        self.pooling = pooling

#        self.params = [self.W_m, self.W_c]
        self.params = [self.W_m1, self.W_m2, self.W_m3, self.W_c]
Пример #4
0
    def __init__(self, x, y, n_words, batch_size, lr, init_emb, vocab_size,
                 emb_dim, hidden_dim, output_dim, window, opt):
        assert window % 2 == 1, 'Window size must be odd'
        """ input """
        self.x = x  # 1D: n_words * batch_size, 2D: window; elem=word id
        self.x_v = x.flatten(
        )  # 1D: n_words * batch_size * window; elem=word id
        self.y = y
        self.batch_size = batch_size
        self.n_words = n_words
        self.lr = lr
        """ params """
        if init_emb is not None:
            self.emb = theano.shared(init_emb)
        else:
            self.emb = theano.shared(sample_weights(vocab_size, emb_dim))

        self.W_in = theano.shared(sample_weights(emb_dim * window, hidden_dim))
        self.W_out = theano.shared(sample_weights(hidden_dim, output_dim))

        self.b_in = theano.shared(sample_weights(hidden_dim))
        self.b_y = theano.shared(sample_weights(output_dim))

        self.params = [self.W_in, self.W_out, self.b_in, self.b_y]
        """ look up embedding """
        self.x_emb = self.emb[
            self.x_v]  # x_emb: 1D: batch_size * n_words * window, 2D: emb_dim
        """ forward """
        self.h = relu(
            T.dot(self.x_emb.reshape((batch_size * n_words, emb_dim *
                                      window)), self.W_in) + self.b_in)
        self.o = T.dot(self.h, self.W_out) + self.b_y
        self.p_y_given_x = T.nnet.softmax(self.o)
        """ predict """
        self.y_pred = T.argmax(self.o, axis=1)
        self.result = T.eq(self.y_pred, self.y)
        """ loss """
        self.log_p = T.log(self.p_y_given_x)[T.arange(batch_size * n_words),
                                             self.y]
        self.nll = -T.sum(self.log_p)
        self.cost = self.nll

        if opt == 'sgd':
            self.updates = sgd(self.cost, self.params, self.emb, self.x_emb,
                               self.lr)
        else:
            self.updates = ada_grad(self.cost, self.params, self.emb,
                                    self.x_emb, self.x, self.lr)
Пример #5
0
    def __init__(self, x, y, n_words, batch_size, lr, init_emb, vocab_size, emb_dim, hidden_dim, output_dim, window, opt):
        assert window % 2 == 1, 'Window size must be odd'

        """ input """
        self.x = x  # 1D: n_words * batch_size, 2D: window; elem=word id
        self.x_v = x.flatten()  # 1D: n_words * batch_size * window; elem=word id
        self.y = y
        self.batch_size = batch_size
        self.n_words = n_words
        self.lr = lr

        """ params """
        if init_emb is not None:
            self.emb = theano.shared(init_emb)
        else:
            self.emb = theano.shared(sample_weights(vocab_size, emb_dim))

        self.W_in = theano.shared(sample_weights(emb_dim * window, hidden_dim))
        self.W_out = theano.shared(sample_weights(hidden_dim, output_dim))

        self.b_in = theano.shared(sample_weights(hidden_dim))
        self.b_y = theano.shared(sample_weights(output_dim))

        self.params = [self.W_in, self.W_out, self.b_in, self.b_y]

        """ look up embedding """
        self.x_emb = self.emb[self.x_v]  # x_emb: 1D: batch_size * n_words * window, 2D: emb_dim

        """ forward """
        self.h = relu(T.dot(self.x_emb.reshape((batch_size * n_words, emb_dim * window)), self.W_in) + self.b_in)
        self.o = T.dot(self.h, self.W_out) + self.b_y
        self.p_y_given_x = T.nnet.softmax(self.o)

        """ predict """
        self.y_pred = T.argmax(self.o, axis=1)
        self.result = T.eq(self.y_pred, self.y)

        """ loss """
        self.log_p = T.log(self.p_y_given_x)[T.arange(batch_size * n_words), self.y]
        self.nll = -T.sum(self.log_p)
        self.cost = self.nll

        if opt == 'sgd':
            self.updates = sgd(self.cost, self.params, self.emb, self.x_emb, self.lr)
        else:
            self.updates = ada_grad(self.cost, self.params, self.emb, self.x_emb, self.x, self.lr)
Пример #6
0
def layers(x, window, dim_emb, dim_hidden, n_layers, activation=tanh):
    params = []
    zero = T.zeros((1, dim_emb * window), dtype=theano.config.floatX)

    def zero_pad_gate(matrix):
        return T.neq(T.sum(T.eq(matrix, zero), 1, keepdims=True), dim_emb * window)

    for i in xrange(n_layers):
        if i == 0:
            W = theano.shared(sample_weights(dim_emb * window, dim_hidden))
#            h = zero_pad_gate(x) * relu(T.dot(x, W))
            h = relu(T.dot(x, W))
        else:
            W = theano.shared(sample_weights(dim_hidden, dim_hidden))
            h = activation(T.dot(h, W))
        params.append(W)

    return h, params
Пример #7
0
    def __init__(self, n_i=32, n_h=32, activation=tanh):
        self.activation = activation

        self.W = theano.shared(sample_weights(n_i, n_h))

        self.W_xr = theano.shared(sample_weights(n_h, n_h))
        self.W_hr = theano.shared(sample_weights(n_h, n_h))

        self.W_xz = theano.shared(sample_weights(n_h, n_h))
        self.W_hz = theano.shared(sample_weights(n_h, n_h))

        self.W_xh = theano.shared(sample_weights(n_h, n_h))
        self.W_hh = theano.shared(sample_weights(n_h, n_h))

        self.params = [self.W, self.W_xr, self.W_hr, self.W_xz, self.W_hz, self.W_xh, self.W_hh]
Пример #8
0
    def __init__(self, n_i=32, n_h=32, activation=tanh):
        self.activation = activation

        self.W = theano.shared(sample_weights(n_i, n_h))

        self.W_xr = theano.shared(sample_weights(n_h, n_h))
        self.W_hr = theano.shared(sample_weights(n_h, n_h))

        self.W_xz = theano.shared(sample_weights(n_h, n_h))
        self.W_hz = theano.shared(sample_weights(n_h, n_h))

        self.W_xh = theano.shared(sample_weights(n_h, n_h))
        self.W_hh = theano.shared(sample_weights(n_h, n_h))

        self.params = [self.W, self.W_xr, self.W_hr, self.W_xz, self.W_hz, self.W_xh, self.W_hh]
Пример #9
0
 def __init__(self, n_i, n_labels):
     self.W = theano.shared(sample_weights(n_i, n_labels))
     self.W_trans = theano.shared(sample_weights(n_labels, n_labels))
     self.params = [self.W, self.W_trans]
Пример #10
0
    def __init__(self, name, w, c, b, y, lr,
                 init_w_emb, vocab_w_size, vocab_c_size,
                 w_emb_dim, c_emb_dim, w_hidden_dim, c_hidden_dim, output_dim,
                 window, opt):

        assert window % 2 == 1, 'Window size must be odd'

        """ input """
        self.name = name
        self.w = w
        self.c = c
        self.b = b
        self.y = y
        self.lr = lr
        self.input = [self.w, self.c, self.b, self.y, self.lr]

        n_phi = w_emb_dim + c_emb_dim * window
        n_words = w.shape[0]

        """ params """
        if init_w_emb is not None:
            self.emb = theano.shared(init_w_emb)
        else:
            self.emb = theano.shared(sample_weights(vocab_w_size, w_emb_dim))

        self.emb_c = theano.shared(sample_norm_dist(vocab_c_size, c_emb_dim))
        self.W_in = theano.shared(sample_weights(w_hidden_dim, 1, window, n_phi))
        self.W_c = theano.shared(sample_weights(c_hidden_dim, 1, window, c_emb_dim))
        self.W_out = theano.shared(sample_weights(w_hidden_dim, output_dim))

        self.b_in = theano.shared(sample_weights(w_hidden_dim, 1))
        self.b_c = theano.shared(sample_weights(c_hidden_dim))
        self.b_y = theano.shared(sample_weights(output_dim))

        """ pad """
        self.zero = theano.shared(np.zeros(shape=(1, 1, window / 2, n_phi), dtype=theano.config.floatX))
        self.zero_c = theano.shared(np.zeros(shape=(1, 1, window / 2, c_emb_dim), dtype=theano.config.floatX))

        self.params = [self.emb_c, self.W_in, self.W_c, self.W_out, self.b_in, self.b_c, self.b_y]

        """ look up embedding """
        x_emb = self.emb[self.w]  # x_emb: 1D: n_words, 2D: w_emb_dim
        c_emb = self.emb_c[self.c]  # c_emb: 1D: n_chars, 2D: c_emb_dim

        """ create feature """
        c_phi = self.create_char_feature(self.b, c_emb, self.zero_c) + self.b_c  # 1D: n_words, 2D: c_hidden_dim(50)
        x_phi = T.concatenate([x_emb, c_phi], axis=1)  # 1D: n_words, 2D: w_emb_dim(100) + c_hidden_dim(50)

        """ convolution """
        x_padded = T.concatenate([self.zero, x_phi.reshape((1, 1, x_phi.shape[0], x_phi.shape[1])), self.zero], axis=2)  # x_padded: 1D: n_words + n_pad, 2D: n_phi
        x_in = conv2d(input=x_padded, filters=self.W_in)  # 1D: 1, 2D: w_hidden_dim(300), 3D: n_words, 4D: 1

        """ feed-forward computation """
        h = relu(x_in.reshape((x_in.shape[1], x_in.shape[2])) + T.repeat(self.b_in, T.cast(x_in.shape[2], 'int32'), 1)).T
        self.o = T.dot(h, self.W_out) + self.b_y
        self.p_y_given_x = T.nnet.softmax(self.o)

        """ prediction """
        self.y_pred = T.argmax(self.o, axis=1)
        self.result = T.eq(self.y_pred, self.y)

        """ cost function """
        self.nll = -T.sum(T.log(self.p_y_given_x)[T.arange(n_words), self.y])
        self.cost = self.nll

        if opt == 'sgd':
            self.updates = sgd(self.cost, self.params, self.emb, x_emb, self.lr)
        else:
            self.updates = ada_grad(self.cost, self.params, self.emb, x_emb, self.w, self.lr)
Пример #11
0
 def __init__(self, n_i=32, n_h=32, activation=tanh):
     self.activation = activation
     self.W = theano.shared(sample_weights(n_i, n_h))
     self.params = [self.W]
Пример #12
0
    def __init__(self, x, c, y, n_words, batch_size, lr, init_emb,
                 vocab_w_size, w_emb_dim, w_hidden_dim, c_emb_dim,
                 c_hidden_dim, output_dim, vocab_c_size, window, opt):
        assert window % 2 == 1, 'Window size must be odd'
        """ input """
        self.x = x  # 1D: n_words * batch_size, 2D: window; elem=word id
        self.x_v = x.flatten(
        )  # 1D: n_words * batch_size * window; elem=word id
        self.c = c  # 1D: n_words * batch_size, 2D: window, 3D: max_len_char, 4D: window; elem=char id
        self.y = y
        self.batch_size = batch_size
        self.n_words = n_words
        self.lr = lr

        n_phi = (w_emb_dim + c_hidden_dim) * window
        max_len_char = T.cast(self.c.shape[2], 'int32')
        """ params """
        if init_emb is not None:
            self.emb = theano.shared(init_emb)
        else:
            self.emb = theano.shared(sample_weights(vocab_w_size, w_emb_dim))

        self.pad = build_shared_zeros((1, c_emb_dim))
        self.e_c = theano.shared(sample_norm_dist(vocab_c_size - 1, c_emb_dim))
        self.emb_c = T.concatenate([self.pad, self.e_c], 0)

        self.W_in = theano.shared(sample_weights(n_phi, w_hidden_dim))
        self.W_c = theano.shared(
            sample_weights(c_emb_dim * window, c_hidden_dim))
        self.W_out = theano.shared(sample_weights(w_hidden_dim, output_dim))

        self.b_in = theano.shared(sample_weights(w_hidden_dim))
        self.b_c = theano.shared(sample_weights(c_hidden_dim))
        self.b_y = theano.shared(sample_weights(output_dim))

        self.params = [
            self.e_c, self.W_in, self.W_c, self.W_out, self.b_in, self.b_c,
            self.b_y
        ]
        """ look up embedding """
        self.x_emb = self.emb[
            self.x_v]  # 1D: batch_size*n_words * window, 2D: emb_dim
        self.c_emb = self.emb_c[
            self.
            c]  # 1D: batch_size*n_words, 2D: window, 3D: max_len_char, 4D: window, 5D: n_c_emb
        self.x_emb_r = self.x_emb.reshape((x.shape[0], x.shape[1], -1))
        """ convolution """
        self.c_phi = T.max(
            T.dot(
                self.c_emb.reshape(
                    (batch_size * n_words, window, max_len_char, -1)),
                self.W_c) + self.b_c, 2)  # 1D: n_words, 2D: window, 3D: n_h_c
        self.x_phi = T.concatenate([self.x_emb_r, self.c_phi], axis=2)
        """ forward """
        self.h = relu(
            T.dot(self.x_phi.reshape((batch_size * n_words,
                                      n_phi)), self.W_in) + self.b_in)
        self.o = T.dot(self.h, self.W_out) + self.b_y
        self.p_y_given_x = T.nnet.softmax(self.o)
        """ predict """
        self.y_pred = T.argmax(self.o, axis=1)
        self.result = T.eq(self.y_pred, self.y)
        """ loss """
        self.log_p = T.log(self.p_y_given_x)[T.arange(batch_size * n_words),
                                             self.y]
        self.nll = -T.sum(self.log_p)
        self.cost = self.nll

        if opt == 'sgd':
            self.updates = sgd(self.cost, self.params, self.emb, self.x_emb,
                               self.lr)
        else:
            self.updates = ada_grad(self.cost, self.params, self.emb,
                                    self.x_emb, self.x, self.lr)
    def __init__(self, x_span, x_word, x_ctx, x_dist, x_slen, y, init_emb, n_vocab, dim_w_p, dim_d, dim_h, L2_reg):
        """
        :param x_span: 1D: batch, 2D: limit * 2 (10); elem=word id
        :param x_word: 1D: batch, 2D: 4 (m_first, m_last, a_first, a_last); elem=word id
        :param x_ctx : 1D: batch, 2D: window * 2 * 2 (20); elem=word id
        :param x_dist: 1D: batch; 2D: 2; elem=[sent dist, ment dist]
        :param x_slen: 1D: batch; 2D: 3; elem=[m_span_len, a_span_len, head_match]
        :param y     : 1D: batch
        """

        self.input = [x_span, x_word, x_ctx, x_dist, x_slen, y]
        self.x_span = x_span
        self.x_word = x_word
        self.x_ctx = x_ctx
        self.x_dist = x_dist
        self.x_slen = x_slen
        self.y = y

        """ Dimensions """
        dim_w_a = dim_w_p / 5
        dim_x_a = dim_w_a * (5 + 2 + 2 + 1)
        dim_x_p = dim_w_p * (10 + 4 + 4 + 2 + 3) + dim_x_a
        batch = y.shape[0]

        """ Hyper Parameters for Cost Function """
        self.a1 = 0.5
        self.a2 = 1.2
        self.a3 = 1.0

        """ Params """
        if init_emb is None:
            self.W_a_w = theano.shared(sample_weights(n_vocab, dim_w_a))
            self.W_p_w = theano.shared(sample_weights(n_vocab, dim_w_p))
        else:
            self.W_a_w = theano.shared(init_emb)
            self.W_p_w = theano.shared(init_emb)

        self.W_a_l = theano.shared(sample_weights(5, dim_w_a))
        self.W_a_o = theano.shared(sample_weights(dim_x_a, 1))

        self.W_p_d = theano.shared(sample_weights(dim_d, dim_w_p))
        self.W_p_l = theano.shared(sample_weights(7, dim_w_p))
        self.W_p_h = theano.shared(sample_weights(dim_x_p, dim_h))
        self.W_p_o = theano.shared(sample_weights(dim_h))

        self.params = [self.W_p_d, self.W_p_l, self.W_a_l, self.W_p_h, self.W_p_o, self.W_a_o]

        """ Anaphoric Layer """
        x_vec_a = T.concatenate(
            [x_span[0][: x_span.shape[1] / 2], x_word[0][: x_word.shape[1] / 2], x_ctx[0][: x_ctx.shape[1] / 2]]
        )

        x_a_w = self.W_a_w[x_vec_a]  # 1D: batch, 2D: (limit * 1 + 2 + ctx), 3D: dim_w_a
        x_a_l = self.W_a_l[x_slen[0][0]]  # 1D: dim_w_a
        h_a = T.concatenate([x_a_w.flatten(), x_a_l])

        """ Pair Layer """
        x_p_w_in = T.concatenate([x_span, x_word, x_ctx], 1).flatten()  # 1D: batch * (limit * 2 + 4 + 20)
        x_p_w = self.W_p_w[x_p_w_in]  # 1D: batch, 2D: (limit * 2 + 4 + ctx * 2), 3D: dim_w
        x_p_l = self.W_p_l[x_slen]  # 1D: batch, 2D: 3, 3D: dim_w
        x_p_d = self.W_p_d[x_dist]  # 1D: batch, 2D: 2, 3D: dim_w
        h_p = T.concatenate([x_p_w.reshape((batch, -1)), x_p_d.reshape((batch, -1)), x_p_l.reshape((batch, -1))], 1)
        g_p = tanh(T.dot(T.concatenate([h_p, T.repeat(h_a.dimshuffle("x", 0), batch, 0)], 1), self.W_p_h))

        """ Output Layer """
        p_y_a = T.dot(h_a, self.W_a_o)  # p_y_a: 1D: 1; elem=scalar
        p_y_p = T.dot(g_p, self.W_p_o)  # p_y_p: 1D: batch
        p_y = T.concatenate([p_y_a, p_y_p])

        """ Label Set """
        y_0 = T.switch(T.sum(y), 0, 1)  # y_0: 1 if the mention is non-anaph else 0
        y_all = T.concatenate([y_0.dimshuffle("x"), y])

        """ Predicts """
        self.y_hat = T.argmax(p_y)
        self.p_y_hat = p_y[T.argmax(p_y - T.min(p_y) * y_all)]

        """ Cost Function """
        self.nll = T.max(self.miss_cost(T.arange(y_all.shape[0]), y_all) * (1 + p_y - self.p_y_hat))
        self.cost = self.nll + L2_reg * L2_sqr(params=self.params) / 2

        """ Optimization """
        self.updates = sgd_w(self.cost, self.params, self.W_p_w, x_p_w, self.W_a_w, x_a_w)

        """ Check Results """
        self.total_p = T.switch(self.y_hat, 1, 0)
        self.total_r = 1 - y_0
        self.correct = y_all[self.y_hat]
        self.correct_t = T.switch(self.correct, T.switch(y_0, 0, 1), 0)
        self.correct_f = T.switch(self.correct, T.switch(y_0, 1, 0), 0)
Пример #14
0
    def __init__(self, x_span, x_word, x_ctx, x_dist, x_slen, y, init_emb, n_vocab, dim_w_p, dim_d, dim_h, L2_reg):
        """
        :param x_span: 1D: batch, 2D: limit * 2 (10); elem=word id
        :param x_word: 1D: batch, 2D: 4 (m_first, m_last, a_first, a_last); elem=word id
        :param x_ctx : 1D: batch, 2D: window * 2 * 2 (20); elem=word id
        :param x_dist: 1D: batch; 2D: 2; elem=[sent dist, ment dist]
        :param x_slen: 1D: batch; 2D: 3; elem=[m_span_len, a_span_len, head_match]
        :param y     : 1D: batch
        """

        self.input  = [x_span, x_word, x_ctx, x_dist, x_slen, y]
        self.x_span = x_span
        self.x_word = x_word
        self.x_ctx  = x_ctx
        self.x_dist = x_dist
        self.x_slen = x_slen
        self.y      = y

        """ Dimensions """
        dim_w_a = dim_w_p / 5
        dim_x_a = dim_w_a * (5 + 2 + 2 + 1)
        dim_x_p = dim_w_p * (10 + 4 + 4 + 2 + 3) + dim_x_a
        batch = y.shape[0]

        """ Hyper Parameters for Cost Function """
        self.a1 = 0.5
        self.a2 = 1.2
        self.a3 = 1.

        """ Params """
        if init_emb is None:
            self.W_a_w = theano.shared(sample_weights(n_vocab, dim_w_a))
            self.W_p_w = theano.shared(sample_weights(n_vocab, dim_w_p))
        else:
            self.W_a_w = theano.shared(init_emb)
            self.W_p_w = theano.shared(init_emb)

        self.W_a_l = theano.shared(sample_weights(5, dim_w_a))
        self.W_a_o = theano.shared(sample_weights(dim_x_a, 1))

        self.W_p_d = theano.shared(sample_weights(dim_d, dim_w_p))
        self.W_p_l = theano.shared(sample_weights(7, dim_w_p))
        self.W_p_h = theano.shared(sample_weights(dim_x_p, dim_h))
        self.W_p_o = theano.shared(sample_weights(dim_h))

        self.params = [self.W_p_d, self.W_p_l, self.W_a_l, self.W_p_h, self.W_p_o, self.W_a_o]

        """ Anaphoric Layer """
        x_vec_a = T.concatenate([x_span[0][:x_span.shape[1]/2],
                                 x_word[0][:x_word.shape[1]/2],
                                 x_ctx[0][:x_ctx.shape[1]/2]])

        x_a_w = self.W_a_w[x_vec_a]       # 1D: batch, 2D: (limit * 1 + 2 + ctx), 3D: dim_w_a
        x_a_l = self.W_a_l[x_slen[0][0]]  # 1D: dim_w_a
        h_a = T.concatenate([x_a_w.flatten(), x_a_l])

        """ Pair Layer """
        x_p_w_in = T.concatenate([x_span, x_word, x_ctx], 1).flatten()  # 1D: batch * (limit * 2 + 4 + 20)
        x_p_w = self.W_p_w[x_p_w_in]  # 1D: batch, 2D: (limit * 2 + 4 + ctx * 2), 3D: dim_w
        x_p_l = self.W_p_l[x_slen]    # 1D: batch, 2D: 3, 3D: dim_w
        x_p_d = self.W_p_d[x_dist]    # 1D: batch, 2D: 2, 3D: dim_w
        h_p = T.concatenate([x_p_w.reshape((batch, -1)), x_p_d.reshape((batch, -1)), x_p_l.reshape((batch, -1))], 1)
        g_p = tanh(T.dot(T.concatenate([h_p, T.repeat(h_a.dimshuffle('x', 0), batch, 0)], 1), self.W_p_h))

        """ Output Layer """
        p_y_a = T.dot(h_a, self.W_a_o)  # p_y_a: 1D: 1; elem=scalar
        p_y_p = T.dot(g_p, self.W_p_o)  # p_y_p: 1D: batch
        p_y = T.concatenate([p_y_a, p_y_p])

        """ Label Set """
        y_0 = T.switch(T.sum(y), 0, 1)  # y_0: 1 if the mention is non-anaph else 0
        y_all = T.concatenate([y_0.dimshuffle('x'), y])

        """ Predicts """
        self.y_hat = T.argmax(p_y)
        self.p_y_hat = p_y[T.argmax(p_y - T.min(p_y) * y_all)]

        """ Cost Function """
        self.nll = T.max(self.miss_cost(T.arange(y_all.shape[0]), y_all) * (1 + p_y - self.p_y_hat))
        self.cost = self.nll + L2_reg * L2_sqr(params=self.params) / 2

        """ Optimization """
        self.updates = sgd_w(self.cost, self.params, self.W_p_w, x_p_w, self.W_a_w, x_a_w)

        """ Check Results """
        self.total_p = T.switch(self.y_hat, 1, 0)
        self.total_r = 1 - y_0
        self.correct = y_all[self.y_hat]
        self.correct_t = T.switch(self.correct, T.switch(y_0, 0, 1), 0)
        self.correct_f = T.switch(self.correct, T.switch(y_0, 1, 0), 0)
Пример #15
0
    def __init__(self, n_in, n_h, activation=tanh):
        self.activation = activation

        self.W_xi = theano.shared(sample_weights(n_in, n_h))
        self.W_hi = theano.shared(sample_weights(n_h, n_h))
        self.W_ci = theano.shared(sample_weights(n_h))

        self.W_xf = theano.shared(sample_weights(n_in, n_h))
        self.W_hf = theano.shared(sample_weights(n_h, n_h))
        self.W_cf = theano.shared(sample_weights(n_h))

        self.W_xc = theano.shared(sample_weights(n_in, n_h))
        self.W_hc = theano.shared(sample_weights(n_h, n_h))

        self.W_xo = theano.shared(sample_weights(n_in, n_h))
        self.W_ho = theano.shared(sample_weights(n_h, n_h))
        self.W_co = theano.shared(sample_weights(n_h))

        self.params = [self.W_xi, self.W_hi, self.W_ci, self.W_xf, self.W_hf, self.W_cf,
                       self.W_xc, self.W_hc, self.W_xo, self.W_ho, self.W_co]
Пример #16
0
    def __init__(self, x, c, y, n_words, batch_size, lr, init_emb, vocab_w_size, w_emb_dim, w_hidden_dim,
                 c_emb_dim, c_hidden_dim, output_dim, vocab_c_size, window, opt):
        assert window % 2 == 1, 'Window size must be odd'

        """ input """
        self.x = x  # 1D: n_words * batch_size, 2D: window; elem=word id
        self.x_v = x.flatten()  # 1D: n_words * batch_size * window; elem=word id
        self.c = c  # 1D: n_words * batch_size, 2D: window, 3D: max_len_char, 4D: window; elem=char id
        self.y = y
        self.batch_size = batch_size
        self.n_words = n_words
        self.lr = lr

        n_phi = (w_emb_dim + c_hidden_dim) * window
        max_len_char = T.cast(self.c.shape[2], 'int32')

        """ params """
        if init_emb is not None:
            self.emb = theano.shared(init_emb)
        else:
            self.emb = theano.shared(sample_weights(vocab_w_size, w_emb_dim))

        self.pad = build_shared_zeros((1, c_emb_dim))
        self.e_c = theano.shared(sample_norm_dist(vocab_c_size - 1, c_emb_dim))
        self.emb_c = T.concatenate([self.pad, self.e_c], 0)

        self.W_in = theano.shared(sample_weights(n_phi, w_hidden_dim))
        self.W_c = theano.shared(sample_weights(c_emb_dim * window, c_hidden_dim))
        self.W_out = theano.shared(sample_weights(w_hidden_dim, output_dim))

        self.b_in = theano.shared(sample_weights(w_hidden_dim))
        self.b_c = theano.shared(sample_weights(c_hidden_dim))
        self.b_y = theano.shared(sample_weights(output_dim))

        self.params = [self.e_c, self.W_in, self.W_c, self.W_out, self.b_in, self.b_c, self.b_y]

        """ look up embedding """
        self.x_emb = self.emb[self.x_v]  # 1D: batch_size*n_words * window, 2D: emb_dim
        self.c_emb = self.emb_c[self.c]  # 1D: batch_size*n_words, 2D: window, 3D: max_len_char, 4D: window, 5D: n_c_emb
        self.x_emb_r = self.x_emb.reshape((x.shape[0], x.shape[1], -1))

        """ convolution """
        self.c_phi = T.max(T.dot(self.c_emb.reshape((batch_size * n_words, window, max_len_char, -1)), self.W_c) + self.b_c, 2)  # 1D: n_words, 2D: window, 3D: n_h_c
        self.x_phi = T.concatenate([self.x_emb_r, self.c_phi], axis=2)

        """ forward """
        self.h = relu(T.dot(self.x_phi.reshape((batch_size * n_words, n_phi)), self.W_in) + self.b_in)
        self.o = T.dot(self.h, self.W_out) + self.b_y
        self.p_y_given_x = T.nnet.softmax(self.o)

        """ predict """
        self.y_pred = T.argmax(self.o, axis=1)
        self.result = T.eq(self.y_pred, self.y)

        """ loss """
        self.log_p = T.log(self.p_y_given_x)[T.arange(batch_size * n_words), self.y]
        self.nll = -T.sum(self.log_p)
        self.cost = self.nll

        if opt == 'sgd':
            self.updates = sgd(self.cost, self.params, self.emb, self.x_emb, self.lr)
        else:
            self.updates = ada_grad(self.cost, self.params, self.emb, self.x_emb, self.x, self.lr)
Пример #17
0
    def __init__(self, n_i, n_h, activation=tanh):
        self.activation = activation
        self.W = theano.shared(sample_weights(n_i, n_h))

        """input gate parameters"""
        self.W_xi = theano.shared(sample_weights(n_h, n_h))
        self.W_hi = theano.shared(sample_weights(n_h, n_h))
        self.W_ci = theano.shared(sample_weights(n_h))

        """forget gate parameters"""
        self.W_xf = theano.shared(sample_weights(n_h, n_h))
        self.W_hf = theano.shared(sample_weights(n_h, n_h))
        self.W_cf = theano.shared(sample_weights(n_h))

        """cell parameters"""
        self.W_xc = theano.shared(sample_weights(n_h, n_h))
        self.W_hc = theano.shared(sample_weights(n_h, n_h))

        """output gate parameters"""
        self.W_xo = theano.shared(sample_weights(n_h, n_h))
        self.W_ho = theano.shared(sample_weights(n_h, n_h))
        self.W_co = theano.shared(sample_weights(n_h))

        self.params = [self.W, self.W_xi, self.W_hi, self.W_ci, self.W_xf, self.W_hf, self.W_cf,
                       self.W_xc, self.W_hc, self.W_xo, self.W_ho, self.W_co]
Пример #18
0
    def __init__(self, n_in, n_h, activation=tanh):
        self.activation = activation

        self.W_xi = theano.shared(sample_weights(n_in, n_h))
        self.W_hi = theano.shared(sample_weights(n_h, n_h))
        self.W_ci = theano.shared(sample_weights(n_h))

        self.W_xf = theano.shared(sample_weights(n_in, n_h))
        self.W_hf = theano.shared(sample_weights(n_h, n_h))
        self.W_cf = theano.shared(sample_weights(n_h))

        self.W_xc = theano.shared(sample_weights(n_in, n_h))
        self.W_hc = theano.shared(sample_weights(n_h, n_h))

        self.W_xo = theano.shared(sample_weights(n_in, n_h))
        self.W_ho = theano.shared(sample_weights(n_h, n_h))
        self.W_co = theano.shared(sample_weights(n_h))

        self.params = [
            self.W_xi, self.W_hi, self.W_ci, self.W_xf, self.W_hf, self.W_cf,
            self.W_xc, self.W_hc, self.W_xo, self.W_ho, self.W_co
        ]
Пример #19
0
 def __init__(self, n_i, n_h):
     self.W = theano.shared(sample_weights(n_i, n_h))
     self.W_t = theano.shared(sample_weights(n_h, n_h))
     self.BOS = theano.shared(sample_weights(n_h))
     self.params = [self.W, self.W_t, self.BOS]
    def __init__(self, x_span, x_word, x_ctx, x_dist, y, init_emb, n_vocab, dim_w, dim_d, dim_h, L2_reg):
        """
        :param x_span: 1D: batch, 2D: limit * 2 (10); elem=word id
        :param x_word: 1D: batch, 2D: 4 (m_first, m_last, a_first, a_last); elem=word id
        :param x_ctx : 1D: batch, 2D: window * 2 * 2 (20); elem=word id
        :param x_dist: 1D: batch; elem=distance between sentences of ant and ment
        :param y     : 1D: batch
        """

        self.input  = [x_span, x_word, x_ctx, x_dist, y]
        self.x_span = x_span
        self.x_word = x_word
        self.x_ctx  = x_ctx
        self.x_dist = x_dist
        self.y      = y

        dim_x = dim_w * (2 + 4 + 20) + 1
        batch = y.shape[0]

        """ Params """
        if init_emb is None:
            self.emb = theano.shared(sample_weights(n_vocab, dim_w))
        else:
            self.emb = theano.shared(init_emb)

        self.W_d = theano.shared(sample_weights(dim_d))
        self.W_i = theano.shared(sample_weights(dim_x, dim_h*3))
        self.W_h = theano.shared(sample_weights(dim_h*3, dim_h))
        self.W_o = theano.shared(sample_weights(dim_h))
        self.params = [self.W_d, self.W_i, self.W_h, self.W_o]

        """ Input Layer """
        x_s = self.emb[x_span]     # 1D: batch, 2D: limit * 2,      3D: dim_w
        x_w = self.emb[x_word]     # 1D: batch, 2D: 4,              3D: dim_w
        x_c = self.emb[x_ctx]      # 1D: batch, 2D: window * 2 * 2, 3D: dim_w
        x_d = self.W_d[x_dist]     # 1D: batch
        x_s_avg = T.concatenate([T.mean(x_s[:, :x_s.shape[1]/2], 1), T.mean(x_s[:, x_s.shape[1]/2:], 1)], 1)
        x = T.concatenate([x_s_avg, x_w.reshape((batch, -1)), x_c.reshape((batch, -1)), x_d.reshape((batch, 1))], 1)

        """ Intermediate Layers """
        h1 = relu(T.dot(x, self.W_i))   # h1: 1D: batch, 2D: dim_h
        h2 = relu(T.dot(h1, self.W_h))  # h2: 1D: batch, 2D: dim_h

        """ Output Layer """
        p_y = sigmoid(T.dot(h2, self.W_o))  # p_y: 1D: batch

        """ Predicts """
        self.thresholds = theano.shared(np.asarray([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], dtype=theano.config.floatX))
        self.y_hat = self.binary_predict(p_y)  # 1D: batch, 2D: 9 (thresholds)
        self.y_hat_index = T.argmax(p_y)
        self.p_y_hat = p_y[self.y_hat_index]

        """ Cost Function """
        self.nll = - T.sum(y * T.log(p_y) + (1. - y) * T.log((1. - p_y)))  # TODO: ranking criterion
        self.cost = self.nll + L2_reg * L2_sqr(params=self.params) / 2

        """ Update """
        self.grad = T.grad(self.cost, self.params)
        self.updates = adam(self.params, self.grad)

        """ Check Results """
        self.result = T.eq(self.y_hat, y.reshape((y.shape[0], 1)))  # 1D: batch, 2D: 9 (thresholds)
        self.total_p = T.sum(self.y_hat, 0)
        self.total_r = T.sum(y, keepdims=True)
        self.correct = T.sum(self.result, 0)
        self.correct_t, self.correct_f = correct_tf(self.result, y.reshape((y.shape[0], 1)))
Пример #21
0
    def __init__(self, n_i, n_h, activation=tanh):
        self.activation = activation
        self.c0 = build_shared_zeros(n_h)
        self.h0 = self.activation(self.c0)

        self.W = theano.shared(sample_weights(n_i, n_h))

        """input gate parameters"""
        self.W_xi = theano.shared(sample_weights(n_h, n_h))
        self.W_hi = theano.shared(sample_weights(n_h, n_h))
        self.W_ci = theano.shared(sample_weights(n_h))

        """forget gate parameters"""
        self.W_xf = theano.shared(sample_weights(n_h, n_h))
        self.W_hf = theano.shared(sample_weights(n_h, n_h))
        self.W_cf = theano.shared(sample_weights(n_h))

        """cell parameters"""
        self.W_xc = theano.shared(sample_weights(n_h, n_h))
        self.W_hc = theano.shared(sample_weights(n_h, n_h))

        """output gate parameters"""
        self.W_xo = theano.shared(sample_weights(n_h, n_h))
        self.W_ho = theano.shared(sample_weights(n_h, n_h))
        self.W_co = theano.shared(sample_weights(n_h))

        self.params = [self.W, self.W_xi, self.W_hi, self.W_ci, self.W_xf, self.W_hf, self.W_cf,
                       self.W_xc, self.W_hc, self.W_xo, self.W_ho, self.W_co]
Пример #22
0
 def create_posit_emb(n_posit, dim_posit):
     return theano.shared(sample_weights(n_posit, dim_posit))
Пример #23
0
 def __init__(self, n_i):
     self.W = theano.shared(sample_weights(n_i * 2, n_i))
     self.W_a = theano.shared(sample_weights(n_i, n_i))
     self.W_p = theano.shared(sample_weights(n_i, n_i))
     self.params = [self.W_a, self.W_p, self.W]
Пример #24
0
 def create_word_emb(n_vocab, init_emb, dim_emb):
     if init_emb is None:
         return theano.shared(sample_weights(n_vocab - 1, dim_emb))
     return theano.shared(init_emb)
Пример #25
0
    def __init__(self, n_i, n_h, activation=tanh):
        self.activation = activation
        self.c0 = build_shared_zeros(n_h)
        self.h0 = self.activation(self.c0)

        self.W = theano.shared(sample_weights(n_i, n_h))

        # input gate parameters
        self.W_xi = theano.shared(sample_weights(n_h, n_h))
        self.W_hi = theano.shared(sample_weights(n_h, n_h))
        self.W_ci = theano.shared(sample_weights(n_h))

        # forget gate parameters
        self.W_xf = theano.shared(sample_weights(n_h, n_h))
        self.W_hf = theano.shared(sample_weights(n_h, n_h))
        self.W_cf = theano.shared(sample_weights(n_h))

        # cell parameters
        self.W_xc = theano.shared(sample_weights(n_h, n_h))
        self.W_hc = theano.shared(sample_weights(n_h, n_h))

        # output gate parameters
        self.W_xo = theano.shared(sample_weights(n_h, n_h))
        self.W_ho = theano.shared(sample_weights(n_h, n_h))
        self.W_co = theano.shared(sample_weights(n_h))

        self.params = [
            self.W, self.W_xi, self.W_hi, self.W_ci, self.W_xf, self.W_hf,
            self.W_cf, self.W_xc, self.W_hc, self.W_xo, self.W_ho, self.W_co
        ]
Пример #26
0
 def create_emb(dim_row, dim_column):
     return theano.shared(sample_weights(dim_row, dim_column))
Пример #27
0
    def __init__(self, x_span, x_word, x_ctx, x_dist, x_slen, y, init_emb, n_vocab, dim_w, dim_d, dim_h, L2_reg):
        """
        :param x_span: 1D: batch, 2D: limit * 2 (10); elem=word id
        :param x_word: 1D: batch, 2D: 4 (m_first, m_last, a_first, a_last); elem=word id
        :param x_ctx : 1D: batch, 2D: window * 2 * 2 (20); elem=word id
        :param x_dist: 1D: batch; 2D: 2; elem=[sent dist, ment dist]
        :param x_slen: 1D: batch; 2D: 3; elem=[m_span_len, a_span_len, head_match]
        :param y     : 1D: batch
        """

        self.input  = [x_span, x_word, x_ctx, x_dist, y]
        self.x_span = x_span
        self.x_word = x_word
        self.x_ctx  = x_ctx
        self.x_dist = x_dist
        self.x_slen = x_slen
        self.y      = y

        dim_x = dim_w * (10 + 4 + 4 + 2 + 3)
        batch = y.shape[0]

        """ Params """
        if init_emb is None:
            self.emb = theano.shared(sample_weights(n_vocab, dim_w))
        else:
            self.emb = theano.shared(init_emb)

        self.W_d = theano.shared(sample_weights(dim_d, dim_w))
        self.W_l = theano.shared(sample_weights(7, dim_w))
        self.W_i = theano.shared(sample_weights(dim_x, dim_h))
        self.W_h = theano.shared(sample_weights(dim_h, dim_h))
        self.W_o = theano.shared(sample_weights(dim_h))
        self.params = [self.W_d, self.W_l, self.W_i, self.W_h, self.W_o]

        """ Input Layer """
        x_vec = T.concatenate([x_span, x_word, x_ctx], 1).flatten()  # 1D: batch * (limit * 2 + 4 + 20)
        x_in = self.emb[x_vec]     # 1D: batch, 2D: limit * 2, 3D: dim_w
        x_d = self.W_d[x_dist]     # 1D: batch, 2D: 2, 3D: dim_w
        x_l = self.W_l[x_slen]     # 1D: batch, 2D: 2, 3D: dim_w
        x = T.concatenate([x_in.reshape((batch, -1)), x_d.reshape((batch, -1)), x_l.reshape((batch, -1))], 1)

        """ Intermediate Layers """
        h1 = relu(T.dot(x, self.W_i))   # h1: 1D: batch, 2D: dim_h
        h2 = relu(T.dot(h1, self.W_h))  # h2: 1D: batch, 2D: dim_h

        """ Output Layer """
        p_y = sigmoid(T.dot(h2, self.W_o))  # p_y: 1D: batch

        """ Cost Function """
        self.nll = - T.sum(y * T.log(p_y) + (1. - y) * T.log((1. - p_y)))  # TODO: ranking criterion
        self.cost = self.nll + L2_reg * L2_sqr(params=self.params) / 2

        """ Update """
        self.updates = sgd(self.cost, self.params, self.emb, x_in)

        """ Predicts """
        self.thresholds = theano.shared(np.asarray([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], dtype=theano.config.floatX))
        self.y_hat = self.binary_predict(p_y)  # 1D: batch, 2D: 9 (thresholds)
        self.y_hat_index = T.argmax(p_y)
        self.p_y_hat = p_y[self.y_hat_index]

        """ Check Results """
        self.result = T.eq(self.y_hat, y.reshape((y.shape[0], 1)))  # 1D: batch, 2D: 9 (thresholds)
        self.total_p = T.sum(self.y_hat, 0)
        self.total_r = T.sum(y, keepdims=True)
        self.correct = T.sum(self.result, 0)
        self.correct_t, self.correct_f = correct_tf(self.result, y.reshape((y.shape[0], 1)))
    def __init__(self, x_span, x_word, x_ctx, x_dist, y, init_emb, n_vocab,
                 dim_w, dim_d, dim_h, L2_reg):
        """
        :param x_span: 1D: batch, 2D: limit * 2 (10); elem=word id
        :param x_word: 1D: batch, 2D: 4 (m_first, m_last, a_first, a_last); elem=word id
        :param x_ctx : 1D: batch, 2D: window * 2 * 2 (20); elem=word id
        :param x_dist: 1D: batch; elem=distance between sentences of ant and ment
        :param y     : 1D: batch
        """

        self.input = [x_span, x_word, x_ctx, x_dist, y]
        self.x_span = x_span
        self.x_word = x_word
        self.x_ctx = x_ctx
        self.x_dist = x_dist
        self.y = y

        dim_x = dim_w * (2 + 4 + 20) + 1
        batch = y.shape[0]
        """ Params """
        if init_emb is None:
            self.emb = theano.shared(sample_weights(n_vocab, dim_w))
        else:
            self.emb = theano.shared(init_emb)

        self.W_d = theano.shared(sample_weights(dim_d))
        self.W_i = theano.shared(sample_weights(dim_x, dim_h * 3))
        self.W_h = theano.shared(sample_weights(dim_h * 3, dim_h))
        self.W_o = theano.shared(sample_weights(dim_h))
        self.params = [self.W_d, self.W_i, self.W_h, self.W_o]
        """ Input Layer """
        x_s = self.emb[x_span]  # 1D: batch, 2D: limit * 2,      3D: dim_w
        x_w = self.emb[x_word]  # 1D: batch, 2D: 4,              3D: dim_w
        x_c = self.emb[x_ctx]  # 1D: batch, 2D: window * 2 * 2, 3D: dim_w
        x_d = self.W_d[x_dist]  # 1D: batch
        x_s_avg = T.concatenate([
            T.mean(x_s[:, :x_s.shape[1] / 2], 1),
            T.mean(x_s[:, x_s.shape[1] / 2:], 1)
        ], 1)
        x = T.concatenate([
            x_s_avg,
            x_w.reshape((batch, -1)),
            x_c.reshape((batch, -1)),
            x_d.reshape((batch, 1))
        ], 1)
        """ Intermediate Layers """
        h1 = relu(T.dot(x, self.W_i))  # h1: 1D: batch, 2D: dim_h
        h2 = relu(T.dot(h1, self.W_h))  # h2: 1D: batch, 2D: dim_h
        """ Output Layer """
        p_y = sigmoid(T.dot(h2, self.W_o))  # p_y: 1D: batch
        """ Predicts """
        self.thresholds = theano.shared(
            np.asarray([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
                       dtype=theano.config.floatX))
        self.y_hat = self.binary_predict(p_y)  # 1D: batch, 2D: 9 (thresholds)
        self.y_hat_index = T.argmax(p_y)
        self.p_y_hat = p_y[self.y_hat_index]
        """ Cost Function """
        self.nll = -T.sum(y * T.log(p_y) + (1. - y) * T.log(
            (1. - p_y)))  # TODO: ranking criterion
        self.cost = self.nll + L2_reg * L2_sqr(params=self.params) / 2
        """ Update """
        self.grad = T.grad(self.cost, self.params)
        self.updates = adam(self.params, self.grad)
        """ Check Results """
        self.result = T.eq(self.y_hat, y.reshape(
            (y.shape[0], 1)))  # 1D: batch, 2D: 9 (thresholds)
        self.total_p = T.sum(self.y_hat, 0)
        self.total_r = T.sum(y, keepdims=True)
        self.correct = T.sum(self.result, 0)
        self.correct_t, self.correct_f = correct_tf(self.result,
                                                    y.reshape((y.shape[0], 1)))