Пример #1
0
		def _step(x_t, ct_1, ht_1, Wi, Wf, Wo, Wc, Whi, Whf, Who, Whc, bi, bf, bo, bc):
			i = sigmoid(T.dot(x_t, Wi) + T.dot(ht_1, Whi) + bi)
			f = sigmoid(T.dot(x_t, Wf) + T.dot(ht_1, Whf) + bf)
			o = sigmoid(T.dot(x_t, Wo) + T.dot(ht_1, Who) + bo)
			c = tanh(T.dot(x_t, Wc) + T.dot(ht_1, Whc) + bc)
			c_new = i * c + f * ct_1
			h_new = o * tanh(c_new)
			return c_new, h_new
Пример #2
0
 def _step(x_t, ct_1, ht_1, W, Wh, b, dim):
     tmp = T.dot(x_t, W) + T.dot(ht_1, Wh) + b
     i = sigmoid(_slice(tmp, 0, dim))
     f = sigmoid(_slice(tmp, 1, dim))
     o = sigmoid(_slice(tmp, 2, dim))
     c = tanh(_slice(tmp, 3, dim))
     c_new = i * c + f * ct_1
     h_new = o * tanh(c_new)
     return c_new, h_new
Пример #3
0
 def _step(x_t, ct_1, ht_1, Wi, Wf, Wo, Wc, Whi, Whf, Who, Whc, bi, bf,
           bo, bc):
     i = sigmoid(T.dot(x_t, Wi) + T.dot(ht_1, Whi) + bi)
     f = sigmoid(T.dot(x_t, Wf) + T.dot(ht_1, Whf) + bf)
     o = sigmoid(T.dot(x_t, Wo) + T.dot(ht_1, Who) + bo)
     c = tanh(T.dot(x_t, Wc) + T.dot(ht_1, Whc) + bc)
     c_new = i * c + f * ct_1
     h_new = o * tanh(c_new)
     return c_new, h_new
Пример #4
0
		def _step(x_t, ct_1, ht_1, W, Wh, b, dim):
			tmp = T.dot(x_t, W) + T.dot(ht_1, Wh) + b
			i = sigmoid(_slice(tmp, 0, dim))
			f = sigmoid(_slice(tmp, 1, dim))
			o = sigmoid(_slice(tmp, 2, dim))
			c = tanh(_slice(tmp, 3, dim))
			c_new = i * c + f * ct_1
			h_new = o * tanh(c_new)
			return c_new, h_new
Пример #5
0
		def _step_index(x_t, ct_1, ht_1, Wi, Wf, Wo, Wc, Whi, Whf, Who, Whc, bi, bf, bo, bc):
			# x_t: array of type int32
			# use indexing on Wi, Wf, Wo and Wc matrices instead of computing the product with the one-hot representation of the input for computational and memory efficiency
			i = sigmoid(Wi[x_t] + T.dot(ht_1, Whi) + bi)
			f = sigmoid(Wf[x_t] + T.dot(ht_1, Whf) + bf)
			o = sigmoid(Wo[x_t] + T.dot(ht_1, Who) + bo)
			c = tanh(Wc[x_t] + T.dot(ht_1, Whc) + bc)
			c_new = i * c + f * ct_1
			h_new = o * tanh(c_new)
			return c_new, h_new
Пример #6
0
    def backward_pass(self, y_true, d_next, cache):
        Wsx, Wsh, bs, \
        Wix, Wih, bi, \
        Wfx, Wfh, bf, \
        Wox, Woh, bo, \
        Why, by = self.get_weights_and_biases()

        # unpacking state variables from [t + 1]
        dh_next, ds_next = d_next

        # recovering variables from forward pass
        x, h, h_old, s, s_old, s_bar, i, f, o, y, prob = cache

        # ~ output as probabilities
        dy = np.copy(prob)
        dy[y_true] -= 1

        # ~ output
        dWhy = dy @ h.T
        dby = dy

        # ~ hidden state
        delta = Why.T @ dy
        dh = dh_next + delta

        # ~ state
        ds = dh * o * (1 - u.tanh(s)**2) + ds_next
        ds_bar = ds * i * (1 - s_bar**2)

        # ~ gates
        di = ds * s_bar * (i * (1 - i))
        df = ds * s_old * (f * (1 - f))
        do = dh * u.tanh(s) * (o * (1 - o))

        # calculating gradients
        dh_acc = 0
        grad = dict(Why=dWhy, by=dby)

        for d, W, dWx, dWh, db in zip([di, df, do, ds_bar],
                                      [Wih, Wfh, Woh, Wsh],
                                      ['Wix', 'Wfx', 'Wox', 'Wsx'],
                                      ['Wih', 'Wfh', 'Woh', 'Wsh'],
                                      ['bi', 'bf', 'bo', 'bs']):
            grad[dWx] = d @ x.T
            grad[dWh] = d @ h_old.T
            grad[db] = d
            dh_acc += W.T @ d

        # saving current derivatives for [t - 1]
        dh_next = dh_acc
        ds_next = ds * f
        state = (dh_next, ds_next)

        return grad, state
Пример #7
0
 def _step_index(x_t, ct_1, ht_1, W, Wh, b, dim):
     # x_t: array of type int32
     # use indexing on W matrix instead of computing dot product with the one-hot representation of the input for computational and memory efficiency
     tmp = W[x_t] + T.dot(ht_1, Wh) + b
     i = sigmoid(_slice(tmp, 0, dim))
     f = sigmoid(_slice(tmp, 1, dim))
     o = sigmoid(_slice(tmp, 2, dim))
     c = tanh(_slice(tmp, 3, dim))
     c_new = i * c + f * ct_1
     h_new = o * tanh(c_new)
     return c_new, h_new
Пример #8
0
 def _step_index(x_t, ct_1, ht_1, Wi, Wf, Wo, Wc, Whi, Whf, Who, Whc,
                 bi, bf, bo, bc):
     # x_t: array of type int32
     # use indexing on Wi, Wf, Wo and Wc matrices instead of computing the product with the one-hot representation of the input for computational and memory efficiency
     i = sigmoid(Wi[x_t] + T.dot(ht_1, Whi) + bi)
     f = sigmoid(Wf[x_t] + T.dot(ht_1, Whf) + bf)
     o = sigmoid(Wo[x_t] + T.dot(ht_1, Who) + bo)
     c = tanh(Wc[x_t] + T.dot(ht_1, Whc) + bc)
     c_new = i * c + f * ct_1
     h_new = o * tanh(c_new)
     return c_new, h_new
Пример #9
0
		def _step_index(x_t, ct_1, ht_1, W, Wh, b, dim):
			# x_t: array of type int32
			# use indexing on W matrix instead of computing dot product with the one-hot representation of the input for computational and memory efficiency
			tmp = W[x_t] + T.dot(ht_1, Wh) + b
			i = sigmoid(_slice(tmp, 0, dim))
			f = sigmoid(_slice(tmp, 1, dim))
			o = sigmoid(_slice(tmp, 2, dim))
			c = tanh(_slice(tmp, 3, dim))
			c_new = i * c + f * ct_1
			h_new = o * tanh(c_new)
			return c_new, h_new
Пример #10
0
    def forward(self, input_data, h_prev, C_prev):
        z = np.row_stack((h_prev, input_data))
        f = utils.sigmoid(np.dot(self.W_f.v, z) + self.b_f.v)
        i = utils.sigmoid(np.dot(self.W_i.v, z) + self.b_i.v)
        C_bar = utils.tanh(np.dot(self.W_C.v, z) + self.b_C.v)

        C = f * C_prev + i * C_bar
        o = utils.sigmoid(np.dot(self.W_o.v, z) + self.b_o.v)
        h = o * utils.tanh(C)

        v = np.dot(self.W_v.v, h) + self.b_v.v
        y = np.exp(v) / np.sum(np.exp(v))  #softmax
        return z, f, i, C_bar, C, o, h, v, y
Пример #11
0
def linear_activation_forward(A_prev, W, b, activation):
    """
    Implement the forward propagation for the LINEAR->ACTIVATION layer

    Arguments:
    A_prev -- activations from previous layer (or input data): (size of previous layer, number of examples)
    W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)
    b -- bias vector, numpy array of shape (size of the current layer, 1)
    activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"

    Returns:
    A -- the output of the activation function, also called the post-activation value
    cache -- a python tuple containing "linear_cache" and "activation_cache";
             stored for computing the backward pass efficiently
    """

    if activation == "sigmoid":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = utils.sigmoid(Z)

    elif activation == "tanh":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = utils.tanh(Z)

    elif activation == "relu":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = utils.relu(Z)

    assert A.shape == (W.shape[0], A_prev.shape[1])

    cache = (linear_cache, activation_cache)
    return A, cache
Пример #12
0
 def expmap(self, u, p, c):
     sqrt_c = c**0.5
     u_norm = u.norm(dim=-1, p=2, keepdim=True).clamp_min(self.min_norm)
     second_term = (tanh(sqrt_c / 2 * self._lambda_x(p, c) * u_norm) * u /
                    (sqrt_c * u_norm))
     gamma_1 = self.mobius_add(p, second_term, c)
     return gamma_1
 def classify(self, x):
     # Add the bias for all inputs
     data_set = np.concatenate((np.ones(1).T, np.array(x)), axis=0)
     for layer in range(0, len(self.weights)):
         sum_value = np.dot(data_set, self.weights[layer])
         data_set = utils.tanh(sum_value)
     return data_set
Пример #14
0
 def expmap0(self, u, c):
     sqrt_c = c**0.5
     u_norm = torch.clamp_max(
         torch.clamp_min(u.norm(dim=-1, p=2, keepdim=True), self.min_norm),
         self.max_norm)
     gamma_1 = tanh(sqrt_c * u_norm) * u / (sqrt_c * u_norm)
     return gamma_1
Пример #15
0
        def _step(
                m_,
                x_,  # sequences
                h_,
                c_,  # outputs_info
                pctx_,
                context,
                Wd_att,
                U_att,
                c_att,
                W_sel,
                b_sel,
                U,  # non_sequences
                dp_=None,
                dp_att_=None):

            preact = tensor.dot(h_, U)
            preact += x_
            # preact += tensor.dot(ctx_, Wc)

            i = _slice(preact, 0, dim)
            f = _slice(preact, 1, dim)
            o = _slice(preact, 2, dim)
            if options['use_dropout']:
                i = i * _slice(dp_, 0, dim)
                f = f * _slice(dp_, 1, dim)
                o = o * _slice(dp_, 2, dim)
            i = tensor.nnet.sigmoid(i)
            f = tensor.nnet.sigmoid(f)
            o = tensor.nnet.sigmoid(o)
            c = tensor.tanh(_slice(preact, 3, dim))

            c = f * c_ + i * c
            c = m_[:, None] * c + (1. - m_)[:, None] * c_

            h = o * tensor.tanh(c)
            h = m_[:, None] * h + (1. - m_)[:, None] * h_

            # attention
            pstate_ = tensor.dot(h, Wd_att)
            pctx_ = pctx_ + pstate_[:, None, :]
            pctx_ = tanh(pctx_)

            alpha = tensor.dot(pctx_, U_att) + c_att
            alpha_pre = alpha
            alpha_shp = alpha.shape
            alpha = tensor.nnet.softmax(
                alpha.reshape([alpha_shp[0], alpha_shp[1]]))  # softmax
            ctx_ = (context * alpha[:, :, None]).sum(1)  # (m, ctx_dim)
            if options['selector']:
                sel_ = tensor.nnet.sigmoid(tensor.dot(h_, W_sel) + b_sel)
                sel_ = sel_.reshape([sel_.shape[0]])
                ctx_ = sel_[:, None] * ctx_

            rval = [
                h, c, alpha, ctx_, sel_, pstate_, pctx_, i, f, o, preact,
                alpha_pre
            ]
            return rval
    def training(self, data_set, correct_output, n=0.2, epochs=1000):
        """
        Trains the NeuralNetwork with the data set.
        Args:
            data_set: matrix with the vectors containg the inputs.
            correct_output: the expected output for each training.
            n: the learning rate.
            epochs: number of times that the network run all the data set.
        """

        # File to write error history
        f = open("graphics/error_output.txt", "w")
        data_set = self.insert_bias(data_set)
        last_errors = []

        for epoch in range(epochs):
            if epoch % 1000 is 0:
                print "Epoch: {}".format(epoch)
            random_index = np.random.randint(data_set.shape[0])

            # layer_data: [w0, w1, w2, output]
            layer_data = [data_set[random_index]]

            # Calculate output for hidden layers
            for layer in range(len(self.weights)):
                dot_value = np.dot(layer_data[layer], self.weights[layer])
                activation = utils.tanh(dot_value)
                layer_data.append(activation)
            # layer_data now contains: [ [outputs from input_layer(inputs)],
            # [outputs from hidden layer(s)], [output from output layer] ]

            # Calculate the error for output layer
            error = correct_output[random_index] - layer_data[-1]
            average_error = abs(np.average(error))
            last_errors.append(average_error)
            if len(last_errors) == 10:
                last_errors_average = np.average(last_errors)
                f.write("{} {}\n".format(epoch, last_errors_average))
                if last_errors_average < 0.001:
                    print last_errors_average
                    break
                last_errors = []
            deltas = [error * utils.dtanh(layer_data[-1])]

            # Calculate Deltas
            for l in range(len(layer_data) - 2, 0, -1):
                deltas.append(
                    deltas[-1].dot(self.weights[l].T)*utils.dtanh(layer_data[l])
                )
            deltas.reverse()

            # Backpropagate. Update the weights for all the layers
            for i in range(len(self.weights)):
                layer = np.atleast_2d(layer_data[i])
                delta = np.atleast_2d(deltas[i])
                self.weights[i] += n * layer.T.dot(delta)

        f.close()
Пример #17
0
 def output(self):
     """
     Generate output of this layer
     """
     self.x = self.prev_layer.output()
     if not self.theta:
         self.theta = self.rng.uniform(size=(self.n_neuron, len(self.x)))
         self.b = self.rng.uniform(size=(self.n_neuron, ))
     return tanh(numpy.dot(self.theta, self.x) + self.b)
Пример #18
0
 def output(self):
     """
     Generate output of this layer
     """
     self.x = self.prev_layer.output()
     if not self.theta:
         self.theta = self.rng.uniform(size=(self.n_neuron, len(self.x)))
         self.b = self.rng.uniform(size=(self.n_neuron,))
     return tanh(numpy.dot(self.theta, self.x) + self.b)
Пример #19
0
 def mobius_matvec(self, m, x, c):
     sqrt_c = c ** 0.5
     x_norm = x.norm(dim=-1, keepdim=True, p=2).clamp_min(self.min_norm)
     mx = x @ m.transpose(-1, -2)
     mx_norm = mx.norm(dim=-1, keepdim=True, p=2).clamp_min(self.min_norm)
     res_c = tanh(mx_norm / x_norm * artanh(sqrt_c * x_norm)) * mx / (mx_norm * sqrt_c)
     cond = (mx == 0).prod(-1, keepdim=True, dtype=torch.uint8)
     res_0 = torch.zeros(1, dtype=res_c.dtype, device=res_c.device)
     res = torch.where(cond, res_0, res_c)
     return res
Пример #20
0
def lstm_numpy(x, W, U, b):
    z = numpy.dot(x, W) + b
    n_hidden = b.shape[0]/4
    h = numpy.zeros((x.shape[0], n_hidden), dtype=x.dtype)
    prev_h = numpy.zeros(n_hidden, dtype=x.dtype)
    prev_c = numpy.zeros(n_hidden, dtype=x.dtype)
    
    def _slice(_x, n, dim):
        return _x[n*dim:(n+1) * dim]
    for n in range(len(h)):
        preact = numpy.dot(prev_h, U) + z[n]
        i = utils.sigmoid(_slice(preact, 0, n_hidden))
        f = utils.sigmoid(_slice(preact, 1, n_hidden))
        o = utils.sigmoid(_slice(preact, 2, n_hidden))
        c = utils.tanh(_slice(preact, 3, n_hidden))

        c = f * prev_c + i * c
        h[n] = o * utils.tanh(c)
        prev_c = c
        prev_h = h[n]
    return h
        def _step(x):
            # attention
            pstate = T.dot(x, Wd_att)
            pstate = pctx + pstate[:, None, :]
            pstate = tanh(pstate)  # n * f * ctx_dim

            alpha = T.dot(pstate, U_att)+c_att  # n * f * 1
            alpha_shp = alpha.shape
            alpha = T.nnet.softmax(alpha.reshape([alpha_shp[0], alpha_shp[1]]))  # softmax
            ctx_ = (context * alpha[:, :, None]).sum(1)  # (n, ctx_dim)
            rval = [alpha, ctx_]
            return rval
Пример #22
0
    def backward(self, target, dh_next, dC_next, C_prev, z, f, i, C_bar, C, o,
                 h, v, y):
        # the following code still needs to be modified.
        # for example: p -> self
        dv = np.copy(y)
        dv[target] -= 1

        self.W_v.d += np.dot(dv, h.T)
        self.b_v.d += dv

        dh = np.dot(self.W_v.v.T, dv)
        dh += dh_next
        do = dh * utils.tanh(C)
        do = utils.dsigmoid(o) * do
        self.W_o.d += np.dot(do, z.T)
        self.b_o.d += do

        dC = np.copy(dC_next)
        dC += dh * o * utils.dtanh(utils.tanh(C))
        dC_bar = dC * i
        dC_bar = utils.dtanh(C_bar) * dC_bar
        self.W_C.d += np.dot(dC_bar, z.T)
        self.b_C.d += dC_bar

        di = dC * C_bar
        di = utils.dsigmoid(i) * di
        self.W_i.d += np.dot(di, z.T)
        self.b_i.d += di

        df = dC * C_prev
        df = utils.dsigmoid(f) * df
        self.W_f.d += np.dot(df, z.T)
        self.b_f.d += df

        dz = (np.dot(self.W_f.v.T, df) + np.dot(self.W_i.v.T, di) +
              np.dot(self.W_C.v.T, dC_bar) + np.dot(self.W_o.v.T, do))
        dh_prev = dz[:self.h_size, :]
        dC_prev = f * dC

        return dh_prev, dC_prev
Пример #23
0
 def output(self):
     """
     Generate output of this layer
     """
     self.x_imgs = self.prev_layer.output()
     return numpy.asarray(map(
         lambda i: tanh(self.b[i] + reduce(
             lambda res, j: res + conv2d(self.x_imgs[j], self.theta[i]),
             self.connections[i],
             0
         )),
         xrange(0, len(self.connections))
     ))
Пример #24
0
def lstm_numpy(x, W, U, b):
    z = numpy.dot(x, W) + b
    n_hidden = b.shape[0] / 4
    h = numpy.zeros((x.shape[0], n_hidden), dtype=x.dtype)
    prev_h = numpy.zeros(n_hidden, dtype=x.dtype)
    prev_c = numpy.zeros(n_hidden, dtype=x.dtype)

    def _slice(_x, n, dim):
        return _x[n * dim:(n + 1) * dim]

    for n in range(len(h)):
        preact = numpy.dot(prev_h, U) + z[n]
        i = utils.sigmoid(_slice(preact, 0, n_hidden))
        f = utils.sigmoid(_slice(preact, 1, n_hidden))
        o = utils.sigmoid(_slice(preact, 2, n_hidden))
        c = utils.tanh(_slice(preact, 3, n_hidden))

        c = f * prev_c + i * c
        h[n] = o * utils.tanh(c)
        prev_c = c
        prev_h = h[n]
    return h
Пример #25
0
    def forward_pass(self, x_index, state):
        Wsx, Wsh, bs, \
        Wix, Wih, bi, \
        Wfx, Wfh, bf, \
        Wox, Woh, bo, \
        Why, by = self.get_weights_and_biases()

        h_old, s_old = state

        # ~ input vector
        x = np.zeros((self.V, 1))
        x[x_index] = 1.0

        # ~ gates
        i = u.sigmoid(Wix @ x + Wih @ h_old + bi)
        o = u.sigmoid(Wox @ x + Woh @ h_old + bo)
        f = u.sigmoid(Wfx @ x + Wfh @ h_old + bf)

        # ~ state
        s_bar = u.tanh(Wsx @ x + Wsh @ h_old + bs)
        s = f * s_old + i * s_bar

        # ~ hidden state
        h = o * u.tanh(s)

        # ~ output
        y = Why @ h + by

        # ~ output as probabilities
        prob = u.softmax(y)

        # saving variables for backpropagation
        cache = (x, h, h_old, s, s_old, s_bar, i, f, o, y, prob)

        state = (h, s)

        return y, state, cache
Пример #26
0
    def predict(self, input):
        L = np.shape(input)[0]
        az = np.zeros((L, self.Nhidden))
        ar = np.zeros((L, self.Nhidden))
        ahhat = np.zeros((L, self.Nhidden))
        ah = np.zeros((L, self.Nhidden))

        a1 = tanh(np.dot(input, self.w1) + self.b1)
        x = np.concatenate((np.zeros((self.Nhidden)), a1[1, :]))
        az[1, :] = sigm(np.dot(x, self.wz) + self.bz)
        ar[1, :] = sigm(np.dot(x, self.wr) + self.br)
        ahhat[1, :] = tanh(np.dot(x, self.wh) + self.bh)
        ah[1, :] = az[1, :] * ahhat[1, :]

        for i in range(1, L):
            x = np.concatenate((ah[i - 1, :], a1[i, :]))
            az[i, :] = sigm(np.dot(x, self.wz) + self.bz)
            ar[i, :] = sigm(np.dot(x, self.wr) + self.br)
            x = np.concatenate((ar[i, :] * ah[i - 1, :], a1[i, :]))
            ahhat[i, :] = tanh(np.dot(x, self.wh) + self.bh)
            ah[i, :] = (1 - az[i, :]) * ah[i - 1, :] + az[i, :] * ahhat[i, :]

        a2 = tanh(np.dot(ah, self.w2) + self.b2)
        return [a1, az, ar, ahhat, ah, a2]
Пример #27
0
    def _forward_propagation(self, X):
        W1 = self.params['W1']
        b1 = self.params['b1']
        W2 = self.params['W2']
        b2 = self.params['b2']

        Z1 = np.dot(W1, X) + b1
        A1 = tanh(Z1)
        Z2 = np.dot(W2, A1) + b2
        A2 = sigmoid(Z2)

        self.caches['Z1'] = Z1
        self.caches['A1'] = A1
        self.caches['Z2'] = Z2
        self.caches['A2'] = A2

        return A2
Пример #28
0
def linear_activation_forward(A_prev, W, b, activation):

    if activation == "sigmoid":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A = ut.sigmoid(Z)

    elif activation == "relu":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A = ut.relu(Z)

    elif activation == "tanh":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A = ut.tanh(Z)

    cache = (linear_cache, Z)

    return A, cache
    def feedforward(self, x):
        """
        :param x: robot.sensor_values()
        :return: Vr,Vl
        """
        self.input = x[0]

        # hidden layer (input ~+ prev_hidden)
        self.input = np.round(self.input, 5)
        prev_values = np.round(self.layer_1_values[-1], 5)
        self.layer_1 = utils.sigmoid(np.dot(self.input, self.synapse_0) + np.dot(prev_values, self.synapse_h)) if RNN else utils.sigmoid(np.dot(self.input, self.synapse_0))

        # output layer
        np.round(self.layer_1, 5)
        self.output = utils.tanh(np.dot(self.layer_1, self.synapse_1))

        # store hidden layer so we can use it in the next time step
        self.layer_1_values.append(copy.deepcopy(self.layer_1))
        return np.round(self.output,5)
Пример #30
0
 def step(prev, elems):
     # gather previous internal state and output state
     if options['use_dropout']:
         m_, x_, dp_ = elems
     else:
         m_, x_ = elems
     h_, c_, _, _, _ = prev
     preact = tf.matmul(h_, U, name="MatMul_preact")   # (64,512)*(512,2048) = (64,2048) or (m,2048) in sampling
     preact = preact + x_
     i = _slice(preact, 0, dim)  # (64,512)  (0-511) or (m,512) in sampling
     f = _slice(preact, 1, dim)  # (64,512)  (512,1023)  or (m,512) in sampling
     o = _slice(preact, 2, dim)  # (64,512)  (1024-1535) or (m,512) in sampling
     if options['use_dropout']:
         i = i * _slice(dp_, 0, dim)
         f = f * _slice(dp_, 1, dim)
         o = o * _slice(dp_, 2, dim)
     i = tf.sigmoid(i)
     f = tf.sigmoid(f)
     o = tf.sigmoid(o)
     c = tf.tanh(_slice(preact, 3, dim))  # (64,512)  (1024-1535)    or (m,512) in sampling
     c = f * c_ + i * c
     c = m_[:, None] * c + (1. - m_)[:, None] * c_   # (m,1)*(m,512) + (m,1)*(m,512) = (m,512) in sampling
     h = o * tf.tanh(c)  # (m,512)*(m,512) = (m,512) in sampling
     h = m_[:, None] * h + (1. - m_)[:, None] * h_
     # attention
     pstate_ = tf.matmul(h, Wd_att) # shape = (64,512)*(512,2048) = (64,2048) or (m,2048) in sampling
     pctx_t = pctx_ + pstate_[:, None, :] # shape = (64,28,2048)+(64,?,2048) = (64,28,2048)  # DOUBT pctx_ += ?? VERIFIED
         #   (1,28,2048) + (m,?,2048) = (m,28,2048)
     pctx_t = tanh(pctx_t)
     alpha = batch_matmul(pctx_t, U_att) + c_att    # (64,28,2048)*(2048,1) + (1,) = (64,28,1) or (m,28,1) in sampling
     alpha_pre = alpha
     alpha_shape = tf.shape(alpha)
     alpha = tf.nn.softmax(tf.reshape(alpha,[alpha_shape[0], alpha_shape[1]]))  # softmax (64,28) or (m,28) in sampling
     ctx_ = tf.reduce_sum((context * alpha[:, :, None]), 1)  # (m, ctx_dim)     # (64*28*2048)*(64,28,1).sum(1) = (64,2048) or (m,2048) in sampling
     if options['selector']:
         sel_ = tf.sigmoid(tf.matmul(h_, W_sel) + b_sel)   # (64,512)*(512,1)+(scalar) = (64,1) or (m,1) in sampling
         sel_shape = tf.shape(sel_)
         sel_ = tf.reshape(sel_,[sel_shape[0]])    # (64,) or (m,) in sampling
         ctx_ = sel_[:, None] * ctx_     # (64,1)*(64,2048) = (64,2048) or (m,2048) in sampling
     else:
         sel_ = tf.zeros(shape=(n_samples,), dtype=tf.float32)
     rval = [h, c, alpha, ctx_, sel_]
     return rval
Пример #31
0
    def dis(self,x,training):
        x = tf.reshape(x,shape=[-1,self.shape,self.shape,3])
        scope = 'dis_'
        layer = lrelu(conv2d(x,self.weights[scope+'w_conv1'])+self.biases[scope+'b_conv1'])
        
        for i in range(1,4):
        	conv = prelu(conv2d(layer,self.weights[scope+'w_conv'+str(i+1)])+self.biases[scope+'b_conv'+str(i+1)],scope+'w_conv'+str(i+1))
        	conv = maxpool2d(conv)
        	conv = tf.nn.dropout(conv,self.keep_rate)
        	layer = conv

        fc = tf.reshape(layer,[-1, int(self.shape/8)*int(self.shape/8)*256])
        fc = lrelu(tf.matmul(fc,self.weights[scope+'w_fc'])+self.biases[scope+'b_fc'])
        fc = tf.nn.dropout(fc,self.keep_rate)
        
        output = tf.matmul(fc,self.weights[scope+'out'])+self.biases[scope+'out']
        output = (tanh(output)+1.0)*0.5

        return output
Пример #32
0
        def _step(m_, x_, # sequences
                  h_, c_, a_, ctx_, # outputs_info
                  dp_=None # non_sequences
                  ):
            # attention
            pstate_ = T.dot(h_, Wd_att)
            pstate_ = pctx_ + pstate_[:,None,:]
            pstate_ = tanh(pstate_)

            alpha = T.dot(pstate_, U_att)+c_att
            alpha_shp = alpha.shape
            alpha = T.nnet.softmax(alpha.reshape([alpha_shp[0], alpha_shp[1]])) # softmax
            ctx_ = (context * alpha[:, :, None]).sum(1) # (m, ctx_dim)
            if options['selector']:
                sel_ = T.nnet.sigmoid(T.dot(h_, W_sel) + b_sel)
                sel_ = sel_.reshape([sel_.shape[0]])
                ctx_ = sel_[:,None] * ctx_
            preact = T.dot(h_, U)
            preact += x_
            preact += T.dot(ctx_, Wc)

            i = _slice(preact, 0, dim)
            f = _slice(preact, 1, dim)
            o = _slice(preact, 2, dim)
            if options['use_dropout']:
                i *= _slice(dp_, 0, dim)
                f *= _slice(dp_, 1, dim)
                o *= _slice(dp_, 2, dim)
            i = T.nnet.sigmoid(i)
            f = T.nnet.sigmoid(f)
            o = T.nnet.sigmoid(o)
            c = T.tanh(_slice(preact, 3, dim))

            c = f * c_ + i * c
            c = m_[:, None] * c + (1. - m_)[:, None] * c_

            h = o * T.tanh(c)
            h = m_[:, None] * h + (1. - m_)[:, None] * h_
            rval = [h, c, alpha, ctx_]
            return rval
Пример #33
0
                            tuple(pgn.read_game(f).mainline())).board(),
                        limit=engine.Limit(time=.1),
                        info=engine.INFO_SCORE)
                except AttributeError:
                    break
                for kwd, x in zip(
                        kwds.values(),
                    (bitboard(board, dtype=int),
                     moves.index(
                         (play_result.move if board.turn else chess.Move(
                             *(len(chess.SQUARES) - np.array(
                                 (play_result.move.from_square,
                                  play_result.move.to_square)) - 1),
                             promotion=play_result.move.promotion)).uci()),
                     tanh(play_result.info["score"].relative.score(
                         mate_score=7625),
                          k=.0025))):
                    kwd.append(x)
            except (AttributeError, IndexError, ValueError):
                continue
            if checkpoint and not len(kwds["X"]) % checkpoint:
                savez()
        savez()
        await uci_protocol.quit()


async def main() -> None:
    semaphore = asyncio.Semaphore(value=3)
    await asyncio.gather(
        *(synchronize(semaphore)(fetch)(file, checkpoint=10000)
          for file in glob.glob(_path("../data/*.pgn"))))
    def build_sampler(self,
                      tfparams,
                      options,
                      use_noise,
                      ctx0,
                      ctx_mask,
                      x,
                      bo_init_state_sampler,
                      to_init_state_sampler,
                      bo_init_memory_sampler,
                      to_init_memory_sampler,
                      mode=None):
        # ctx: # frames x ctx_dim
        ctx_ = ctx0
        counts = tf.reduce_sum(ctx_mask, axis=-1)  # scalar

        ctx = ctx_
        ctx_mean = tf.reduce_sum(ctx, axis=0) / counts  # (2048,)
        ctx = tf.expand_dims(ctx, 0)  # (1,28,2048)

        # initial state/cell
        bo_init_state = self.layers.get_layer('ff')[1](tfparams,
                                                       ctx_mean,
                                                       options,
                                                       prefix='ff_state',
                                                       activ='tanh')  # (512,)
        bo_init_memory = self.layers.get_layer('ff')[1](tfparams,
                                                        ctx_mean,
                                                        options,
                                                        prefix='ff_memory',
                                                        activ='tanh')  # (512,)
        to_init_state = tf.zeros(
            shape=(options['lstm_dim'], ),
            dtype=tf.float32)  # DOUBT : constant or not? # (512,)
        to_init_memory = tf.zeros(shape=(options['lstm_dim'], ),
                                  dtype=tf.float32)  # (512,)
        init_state = [bo_init_state, to_init_state]
        init_memory = [bo_init_memory, to_init_memory]

        print 'building f_init...',
        f_init = [ctx0] + init_state + init_memory
        print 'done'

        init_state = [bo_init_state_sampler, to_init_state_sampler]
        init_memory = [bo_init_memory_sampler, to_init_memory_sampler]

        # # if it's the first word, embedding should be all zero
        emb = tf.cond(
            tf.reduce_any(x[:, None] < 0), lambda: tf.zeros(
                shape=(1, tfparams['Wemb'].shape[1]), dtype=tf.float32),
            lambda: tf.nn.embedding_lookup(tfparams['Wemb'], x))  # (m,512)

        bo_lstm = self.layers.get_layer('lstm_cond')[1](
            tfparams,
            emb,
            options,
            prefix='bo_lstm',
            mask=None,
            context=ctx,
            context_mean=tf.expand_dims(ctx_mean, 0),
            one_step=True,
            init_state=init_state[0],
            init_memory=init_memory[0],
            use_noise=use_noise,
            mode=mode)
        to_lstm = self.layers.get_layer('lstm')[1](tfparams,
                                                   bo_lstm[0],
                                                   mask=None,
                                                   one_step=True,
                                                   init_state=init_state[1],
                                                   init_memory=init_memory[1],
                                                   prefix='to_lstm')
        next_state = [bo_lstm[0], to_lstm[0]]
        next_memory = [bo_lstm[1], to_lstm[0]]

        bo_lstm_h = bo_lstm[0]  # (1,512)
        to_lstm_h = to_lstm[0]  # (1,512)
        alphas = bo_lstm[2]  # (1,28)
        ctxs = bo_lstm[3]  # (1,2048)
        betas = bo_lstm[4]  # (1,)
        if options['use_dropout']:
            bo_lstm_h = self.layers.dropout_layer(bo_lstm_h, use_noise)
            to_lstm_h = self.layers.dropout_layer(to_lstm_h, use_noise)
        # compute word probabilities
        logit = self.layers.get_layer('ff')[1](
            tfparams, bo_lstm_h, options, prefix='ff_logit_bo',
            activ='linear')  # (1,512)*(512,512) = (1,512)
        if options['prev2out']:
            logit += emb
        if options['ctx2out']:
            to_lstm_h *= (1 - betas[:, None])  # (1,512)*(1,1) = (1,512)
            ctxs_beta = self.layers.get_layer('ff')[1](
                tfparams, ctxs, options, prefix='ff_logit_ctx',
                activ='linear')  # (1,2048)*(2048,512) = (1,512)
            ctxs_beta += self.layers.get_layer('ff')[1](
                tfparams,
                to_lstm_h,
                options,
                prefix='ff_logit_to',
                activ='linear')  # (1,512)+((1,512)*(512,512)) = (1,512)
            logit += ctxs_beta
        logit = utils.tanh(logit)  # (1,512)
        if options['use_dropout']:
            logit = self.layers.dropout_layer(logit, use_noise)
        # (1,n_words)
        logit = self.layers.get_layer('ff')[1](
            tfparams, logit, options, prefix='ff_logit',
            activ='linear')  # (1,512)*(512,vocab_size) = (1,vocab_size)
        next_probs = tf.nn.softmax(logit)
        # next_sample = trng.multinomial(pvals=next_probs).argmax(1)    # INCOMPLETE , DOUBT : why is multinomial needed?
        next_sample = tf.multinomial(
            next_probs, 1)  # draw samples with given probabilities (1,1)
        next_sample_shape = tf.shape(next_sample)
        next_sample = tf.reshape(next_sample, [next_sample_shape[0]])
        # next word probability
        print 'building f_next...',
        f_next = [next_probs, next_sample] + next_state + next_memory
        print 'done'
        return f_init, f_next
Пример #35
0
    def build_sampler(self, tparams, options, use_noise, trng, mode=None):
        # context: #annotations x dim
        ctx0 = tensor.matrix('ctx_sampler', dtype='float32')
        # ctx0.tag.test_value = numpy.random.uniform(size=(50,1024)).astype('float32')
        ctx_mask = tensor.vector('ctx_mask', dtype='float32')
        # ctx_mask.tag.test_value = numpy.random.binomial(n=1,p=0.5,size=(50,)).astype('float32')
        ctx0_c = tensor.matrix('ctx_sampler_c', dtype='float32')
        # ctx0.tag.test_value = numpy.random.uniform(size=(50,1024)).astype('float32')
        ctx_mask_c = tensor.vector('ctx_mask_c', dtype='float32')

        ctx_ = ctx0
        counts = ctx_mask.sum(-1)

        ctx = ctx_
        ctx_mean = ctx.sum(0) / counts

        ctx_c_ = ctx0_c
        counts_c = ctx_mask_c.sum(-1)

        ctx_c = ctx_c_
        ctx_mean_c = ctx_c.sum(0) / counts_c

        # ctx_mean = ctx.mean(0)
        ctx = ctx.dimshuffle('x', 0, 1)
        # initial state/cell
        bo_init_state = self.layers.get_layer('ff')[1](tparams,
                                                       ctx_mean,
                                                       options,
                                                       prefix='ff_state',
                                                       activ='tanh')
        bo_init_memory = self.layers.get_layer('ff')[1](tparams,
                                                        ctx_mean,
                                                        options,
                                                        prefix='ff_memory',
                                                        activ='tanh')

        bo_init_state_c = self.layers.get_layer('ff')[1](tparams,
                                                         ctx_mean_c,
                                                         options,
                                                         prefix='ff_state_c',
                                                         activ='tanh')
        bo_init_memory_c = self.layers.get_layer('ff')[1](tparams,
                                                          ctx_mean_c,
                                                          options,
                                                          prefix='ff_memory_c',
                                                          activ='tanh')

        bo_init_state += bo_init_state_c
        bo_init_memory += bo_init_memory_c

        to_init_state = tensor.alloc(0., options['dim'])
        to_init_memory = tensor.alloc(0., options['dim'])
        init_state = [bo_init_state, to_init_state]
        init_memory = [bo_init_memory, to_init_memory]

        print 'Building f_init...',
        f_init = theano.function([ctx0, ctx_mask, ctx0_c, ctx_mask_c],
                                 [ctx0] + init_state + init_memory,
                                 name='f_init',
                                 on_unused_input='ignore',
                                 profile=False,
                                 mode=mode)
        print 'Done'

        x = tensor.vector('x_sampler', dtype='int64')
        init_state = [
            tensor.matrix('bo_init_state', dtype='float32'),
            tensor.matrix('to_init_state', dtype='float32')
        ]
        init_memory = [
            tensor.matrix('bo_init_memory', dtype='float32'),
            tensor.matrix('to_init_memory', dtype='float32')
        ]

        # if it's the first word, emb should be all zero
        emb = tensor.switch(x[:, None] < 0,
                            tensor.alloc(0., 1, tparams['Wemb'].shape[1]),
                            tparams['Wemb'][x])

        bo_lstm = self.layers.get_layer('lstm_cond')[1](
            tparams,
            emb,
            options,
            prefix='bo_lstm',
            mask=None,
            context=ctx,
            context_c=ctx_c,
            one_step=True,
            init_state=init_state[0],
            init_memory=init_memory[0],
            trng=trng,
            use_noise=use_noise,
            mode=mode)
        to_lstm = self.layers.get_layer('lstm')[1](tparams,
                                                   bo_lstm[0],
                                                   mask=None,
                                                   one_step=True,
                                                   init_state=init_state[1],
                                                   init_memory=init_memory[1],
                                                   prefix='to_lstm')
        next_state = [bo_lstm[0], to_lstm[0]]
        next_memory = [bo_lstm[1], to_lstm[0]]

        bo_lstm_h = bo_lstm[0]
        to_lstm_h = to_lstm[0]
        alphas = bo_lstm[2]
        alphas_c = bo_lstm[3]
        ctxs = bo_lstm[4]
        ctxs_c = bo_lstm[5]
        weight = bo_lstm[6]
        if options['use_dropout']:
            bo_lstm_h = self.layers.dropout_layer(bo_lstm_h, use_noise, trng)
            to_lstm_h = self.layers.dropout_layer(to_lstm_h, use_noise, trng)

        logit = self.layers.get_layer('ff')[1](tparams,
                                               bo_lstm_h,
                                               options,
                                               prefix='ff_logit_bo',
                                               activ='linear')
        if options['prev2out']:
            logit += emb
        if options['ctx2out']:
            betas = weight[:, 2]
            # betas = betas.reshape([betas.shape[1],betas.shape[2]])
            to_lstm_h *= betas[:, None]
            ctxs_beta = self.layers.get_layer('ff')[1](tparams,
                                                       ctxs,
                                                       options,
                                                       prefix='ff_logit_ctx',
                                                       activ='linear')
            ctxs_beta_c = self.layers.get_layer('ff')[1](
                tparams,
                ctxs_c,
                options,
                prefix='ff_logit_ctx_c',
                activ='linear')
            to_lstm_h = self.layers.get_layer('ff')[1](tparams,
                                                       to_lstm_h,
                                                       options,
                                                       prefix='ff_logit_to',
                                                       activ='linear')
            logit = logit + ctxs_beta + ctxs_beta_c + to_lstm_h
        logit = utils.tanh(logit)
        if options['use_dropout']:
            logit = self.layers.dropout_layer(logit, use_noise, trng)

        logit = self.layers.get_layer('ff')[1](tparams,
                                               logit,
                                               options,
                                               prefix='ff_logit',
                                               activ='linear')
        logit_shp = logit.shape
        next_probs = tensor.nnet.softmax(logit)
        next_sample = trng.multinomial(pvals=next_probs).argmax(1)

        # next word probability
        print 'building f_next...'
        f_next = theano.function(
            [x, ctx0, ctx_mask, ctx0_c, ctx_mask_c] + init_state + init_memory,
            [next_probs, next_sample] + next_state + next_memory,
            name='f_next',
            profile=False,
            mode=mode,
            on_unused_input='ignore')
        print 'Done'
        return f_init, f_next
    def build_model(self, tfparams, options, x, mask, ctx, ctx_mask):
        use_noise = tf.Variable(False,
                                dtype=tf.bool,
                                trainable=False,
                                name="use_noise")
        x_shape = tf.shape(x)
        n_timesteps = x_shape[0]
        n_samples = x_shape[1]
        # get word embeddings
        emb = tf.nn.embedding_lookup(
            tfparams['Wemb'], x,
            name="inputs_emb_lookup")  # (num_steps,64,512)
        emb_shape = tf.shape(emb)
        indices = tf.expand_dims(tf.range(1, emb_shape[0]), axis=1)
        emb_shifted = tf.scatter_nd(indices, emb[:-1], emb_shape)
        emb = emb_shifted

        # count num_frames==28
        with tf.name_scope("ctx_mean"):
            with tf.name_scope("counts"):
                counts = tf.expand_dims(
                    tf.reduce_sum(ctx_mask,
                                  axis=-1,
                                  name="reduce_sum_ctx_mask"), 1)  # (64,1)
            ctx_ = ctx
            ctx0 = ctx_  # (64,28,2048)
            ctx_mean = tf.reduce_sum(
                ctx0, axis=1, name="reduce_sum_ctx"
            ) / counts  #mean pooling of {vi}   # (64,2048)

        # initial state/cell
        with tf.name_scope("init_state"):
            init_state = self.layers.get_layer('ff')[1](
                tfparams, ctx_mean, options, prefix='ff_state',
                activ='tanh')  # (64,512)

        with tf.name_scope("init_memory"):
            init_memory = self.layers.get_layer('ff')[1](
                tfparams, ctx_mean, options, prefix='ff_memory',
                activ='tanh')  # (64,512)

        # hstltm = self.layers.build_hlstm(['bo_lstm','to_lstm'], inputs, n_timesteps, init_state, init_memory)
        with tf.name_scope("bo_lstm"):
            bo_lstm = self.layers.get_layer('lstm_cond')[1](
                tfparams,
                emb,
                options,
                prefix='bo_lstm',
                mask=mask,
                context=ctx0,
                context_mean=ctx_mean,
                one_step=False,
                init_state=init_state,
                init_memory=init_memory,
                use_noise=use_noise)
        with tf.name_scope("to_lstm"):
            to_lstm = self.layers.get_layer('lstm')[1](tfparams,
                                                       bo_lstm[0],
                                                       mask=mask,
                                                       one_step=False,
                                                       prefix='to_lstm')
        bo_lstm_h = bo_lstm[0]  # (t,64,512)
        to_lstm_h = to_lstm[0]  # (t,64,512)
        alphas = bo_lstm[2]  # (t,64,28)
        ctxs = bo_lstm[3]  # (t,64,2048)
        betas = bo_lstm[4]  # (t,64,)
        if options['use_dropout']:
            bo_lstm_h = self.layers.dropout_layer(bo_lstm_h, use_noise)
            to_lstm_h = self.layers.dropout_layer(to_lstm_h, use_noise)
        # compute word probabilities
        logit = self.layers.get_layer('ff')[1](
            tfparams, bo_lstm_h, options, prefix='ff_logit_bo',
            activ='linear')  # (t,64,512)*(512,512) = (t,64,512)
        if options['prev2out']:
            logit += emb
        if options['ctx2out']:
            to_lstm_h *= (1 - betas[:, :, None])  # (t,64,512)*(t,64,1)
            ctxs_beta = self.layers.get_layer('ff')[1](
                tfparams, ctxs, options, prefix='ff_logit_ctx',
                activ='linear')  # (t,64,2048)*(2048,512) = (t,64,512)
            ctxs_beta += self.layers.get_layer('ff')[1](
                tfparams,
                to_lstm_h,
                options,
                prefix='ff_logit_to',
                activ='linear'
            )  # (t,64,512)+((t,64,512)*(512,512)) = (t,64,512)
            logit += ctxs_beta
        logit = utils.tanh(logit)  # (t,64,512)
        if options['use_dropout']:
            logit = self.layers.dropout_layer(logit, use_noise)
        # (t,m,n_words)
        logit = self.layers.get_layer('ff')[1](
            tfparams, logit, options, prefix='ff_logit',
            activ='linear')  # (t,64,512)*(512,vocab_size) = (t,64,vocab_size)
        logit_shape = tf.shape(logit)
        # (t*m, n_words)
        probs = tf.nn.softmax(
            tf.reshape(logit,
                       [logit_shape[0] * logit_shape[1], logit_shape[2]
                        ]))  # (t*64, vocab_size)
        # cost
        x_flat = tf.reshape(x, [x_shape[0] * x_shape[1]])  # (t*m,)
        x_flat_shape = tf.shape(x_flat)
        gather_indices = tf.stack([tf.range(x_flat_shape[0]), x_flat],
                                  axis=1)  # (t*m,2)
        cost = -tf.log(
            tf.gather_nd(probs, gather_indices) +
            1e-8)  # (t*m,) : pick probs of each word in each timestep
        cost = tf.reshape(cost, [x_shape[0], x_shape[1]])  # (t,m)
        cost = tf.reduce_sum(
            (cost * mask), axis=0
        )  # (m,) : sum across all timesteps for each element in batch
        extra = [probs, alphas, betas]
        return use_noise, cost, extra
Пример #37
0
    def build_model(self, tparams, options):
        trng = RandomStreams(1234)
        use_noise = theano.shared(numpy.float32(0.))
        # description string: #words x #samples
        x = tensor.matrix('x', dtype='int64')
        mask = tensor.matrix('mask', dtype='float32')
        # context: #samples x #annotations x dim
        ctx = tensor.tensor3('ctx', dtype='float32')
        mask_ctx = tensor.matrix('mask_ctx', dtype='float32')
        ctx_c = tensor.tensor3('ctx_c', dtype='float32')
        mask_ctx_c = tensor.matrix('mask_ctx_c', dtype='float32')
        n_timesteps = x.shape[0]
        n_samples = x.shape[1]

        # index into the word embedding matrix, shift it forward in time
        emb = tparams['Wemb'][x.flatten()].reshape(
            [n_timesteps, n_samples, options['dim_word']])
        emb_shifted = tensor.zeros_like(emb)
        emb_shifted = tensor.set_subtensor(emb_shifted[1:], emb[:-1])
        emb = emb_shifted
        counts = mask_ctx.sum(-1).dimshuffle(0, 'x')

        ctx_ = ctx
        ctx_c_ = ctx_c

        ctx0 = ctx_
        ctx_mean = ctx0.sum(1) / counts

        ctx0_c = ctx_c_
        ctx_mean_c = ctx0_c.sum(1) / counts

        # initial state/cell
        init_state = self.layers.get_layer('ff')[1](tparams,
                                                    ctx_mean,
                                                    options,
                                                    prefix='ff_state',
                                                    activ='tanh')
        init_memory = self.layers.get_layer('ff')[1](tparams,
                                                     ctx_mean,
                                                     options,
                                                     prefix='ff_memory',
                                                     activ='tanh')
        init_state_c = self.layers.get_layer('ff')[1](tparams,
                                                      ctx_mean_c,
                                                      options,
                                                      prefix='ff_state_c',
                                                      activ='tanh')
        init_memory_c = self.layers.get_layer('ff')[1](tparams,
                                                       ctx_mean_c,
                                                       options,
                                                       prefix='ff_memory_c',
                                                       activ='tanh')

        init_state += init_state_c
        init_memory += init_memory_c

        # decoder
        bo_lstm = self.layers.get_layer('lstm_cond')[1](
            tparams,
            emb,
            options,
            prefix='bo_lstm',
            mask=mask,
            context=ctx0,
            context_c=ctx0_c,
            one_step=False,
            init_state=init_state,
            init_memory=init_memory,
            trng=trng,
            use_noise=use_noise)
        to_lstm = self.layers.get_layer('lstm')[1](tparams,
                                                   bo_lstm[0],
                                                   mask=mask,
                                                   one_step=False,
                                                   prefix='to_lstm')

        bo_lstm_h = bo_lstm[0]
        to_lstm_h = to_lstm[0]
        alphas = bo_lstm[2]
        alphas_c = bo_lstm[3]
        ctxs = bo_lstm[4]
        ctxs_c = bo_lstm[5]
        weight = bo_lstm[6]
        if options['use_dropout']:
            bo_lstm_h = self.layers.dropout_layer(bo_lstm_h, use_noise, trng)
            to_lstm_h = self.layers.dropout_layer(to_lstm_h, use_noise, trng)

        # compute word probabilities
        logit = self.layers.get_layer('ff')[1](tparams,
                                               bo_lstm_h,
                                               options,
                                               prefix='ff_logit_bo',
                                               activ='linear')
        if options['prev2out']:
            logit += emb
        if options['ctx2out']:
            betas = weight[:, :, 2]
            #betas = betas.reshape([betas.shape[1],betas.shape[2]])
            to_lstm_h *= betas[:, :, None]
            ctxs_beta = self.layers.get_layer('ff')[1](tparams,
                                                       ctxs,
                                                       options,
                                                       prefix='ff_logit_ctx',
                                                       activ='linear')
            ctxs_beta_c = self.layers.get_layer('ff')[1](
                tparams,
                ctxs_c,
                options,
                prefix='ff_logit_ctx_c',
                activ='linear')
            to_lstm_h = self.layers.get_layer('ff')[1](tparams,
                                                       to_lstm_h,
                                                       options,
                                                       prefix='ff_logit_to',
                                                       activ='linear')
            logit = logit + ctxs_beta + ctxs_beta_c + to_lstm_h
        logit = utils.tanh(logit)

        if options['use_dropout']:
            logit = self.layers.dropout_layer(logit, use_noise, trng)

        # (t,m,n_words)
        logit = self.layers.get_layer('ff')[1](tparams,
                                               logit,
                                               options,
                                               prefix='ff_logit',
                                               activ='linear')
        logit_shp = logit.shape
        # (t*m, n_words)
        probs = tensor.nnet.softmax(
            logit.reshape([logit_shp[0] * logit_shp[1], logit_shp[2]]))
        # cost
        x_flat = x.flatten()  # (t*m,)
        cost = -tensor.log(probs[tensor.arange(x_flat.shape[0]), x_flat] +
                           1e-8)

        cost = cost.reshape([x.shape[0], x.shape[1]])
        cost = (cost * mask).sum(0)
        extra = [probs, alphas, alphas_c, weight[:, :, 0], weight[:, :, 1]]

        return trng, use_noise, x, mask, ctx, mask_ctx, ctx_c, mask_ctx_c, cost, extra
Пример #38
0
    def get_next_step(self, cur, prevtime=0):
        """
        功能:给定一个当前随机游走到的结点cur,这个两个相连的结点(可能有多条边),得出
        输出:
        #return J, q
        直接输出下一个节点,以及时间戳
        """
        G = self.G
        
        tmp_key = []
        tmp_node = []
        tmp_time = []
        unnormalized_probs_t = []
        unnormalized_probs_a = []

        cur_nbrs = list(G.neighbors(cur))
        if self.time_biased_type == "simple_graph": #DeepWalk
            for nbr in cur_nbrs:
                tmp_node.append(nbr)
                unnormalized_probs_t.append(1)                
                
            if len(unnormalized_probs_t) > 0:
                idx = weight_choice(unnormalized_probs_t)
                next_node = tmp_node[idx]
                next_time = 0 
                next_key = 0
                return next_node, next_time, next_key
            else:
                return None, None, None  #没有符合条件的
        else:    
            for nbr in cur_nbrs:
                nbr_key = list(G.get_edge_data(cur,nbr))    #cur领边的key数组        
                for k in nbr_key:
                    t = k
                    a = G[cur][nbr][k]['weight']
                    if self.time_biased_type == "no_time_limit":
                        unnormalized_probs_t.append(1)
                    
                    elif t >= prevtime:
                        unnormalized_probs_a.append(a)
                        
                        if self.time_biased_type == "time_uniform"  :
                            unnormalized_probs_t.append(1)
                        elif self.time_biased_type == "time_close_raw"  :
                            unnormalized_probs_t.append( self.max_time - t + 1 )
                        elif self.time_biased_type == "time_close_exp"  :
                            unnormalized_probs_t.append( t - prevtime )
                        else:
                            unnormalized_probs_t.append( t - prevtime + 1 )
                        tmp_time.append(t)
                        tmp_node.append(nbr)
                        tmp_key.append(k)
                        

            if self.time_biased_type == "time_close_linear" :
                unnormalized_probs_t = linear_rank_mapping( unnormalized_probs_t, order='descending' )
            elif self.time_biased_type == "time_far_linear" :
                unnormalized_probs_t = linear_rank_mapping( unnormalized_probs_t)   
            elif self.time_biased_type == "time_freq_tanh":
                unnormalized_probs_t = tanh(unnormalized_probs_t)
            elif self.time_biased_type == "time_close_exp":
                unnormalized_probs_t = softmax(unnormalized_probs_t)

            if self.amount_biased == "amount_linear":
                unnormalized_probs_a = linear_rank_mapping(unnormalized_probs_a)
            elif self.amount_biased == "amount_tanh":
                unnormalized_probs_a = tanh(unnormalized_probs_a)
            elif self.amount_biased == "amount_exp":
                unnormalized_probs_a = softmax(unnormalized_probs_a)

            
            if len(unnormalized_probs_t) > 0: #有符合条件的下一个点
                if self.amount_biased != "amount_uniform":
                    unnormalized_probs = combine_probs(unnormalized_probs_t, unnormalized_probs_a, self.alpha)        
                else:
                    unnormalized_probs = unnormalized_probs_t
                    
                selected = weight_choice(unnormalized_probs)               
                next_node = tmp_node[selected]  
                next_time = tmp_time[selected]        
                next_key = tmp_key[selected]   
                return next_node, next_time, next_key  
            
            else:
                return None, None, None  #没有符合条件的