Exemplo n.º 1
0
 def rnn_output(self, y_prev, h_prev):
     h_t = T.tanh(T.dot(self.Wh, h_prev))
     # compute new out_label
     y_hat_t = softmax((T.dot(self.U, h_t) + self.b).T).T
     out_label = T.argmax(y_hat_t)
     
     return (out_label, h_t), scan_module.until(T.eq(out_label, self.out_end))
    def lstm_output(self, y_prev, ch_prev):
        """calculates info to pass to next time step.
        ch_prev is a vector of size 2*hdim"""

        c_prev = ch_prev[:self.hdim]  #T.vector('c_prev')
        h_prev = ch_prev[self.hdim:]  #T.vector('h_prev')

        # gates (input, forget, output)
        i_t = sigmoid(T.dot(self.Ui, h_prev))
        f_t = sigmoid(T.dot(self.Uf, h_prev))
        o_t = sigmoid(T.dot(self.Uo, h_prev))
        # new memory cell
        c_new_t = T.tanh(T.dot(self.Uc, h_prev))
        # final memory cell
        c_t = f_t * c_prev + i_t * c_new_t
        # final hidden state
        h_t = o_t * T.tanh(c_t)

        # Input vector for softmax
        theta_t = T.dot(self.U, h_t) + self.b
        # Softmax prob vector
        y_hat_t = softmax(theta_t.T).T
        # Softmax wraps output in another list, why??
        # (specifically it outputs a 2-d row, not a 1-d column)
        # y_hat_t = y_hat_t[0]
        # Compute new cost
        out_label = T.argmax(y_hat_t)

        # final joint state
        ch_t = T.concatenate([c_t, h_t])

        return (out_label,
                ch_t), scan_module.until(T.eq(out_label, self.out_end))
Exemplo n.º 3
0
    def lstm_output(self, y_prev, ch_prev):
        """calculates info to pass to next time step.
        ch_prev is a vector of size 2*hdim"""

        c_prev = ch_prev[:self.hdim]#T.vector('c_prev')
        h_prev = ch_prev[self.hdim:]#T.vector('h_prev')

        # gates (input, forget, output)
        i_t = sigmoid(T.dot(self.Ui, h_prev))
        f_t = sigmoid(T.dot(self.Uf, h_prev))
        o_t = sigmoid(T.dot(self.Uo, h_prev))
        # new memory cell
        c_new_t = T.tanh(T.dot(self.Uc, h_prev))
        # final memory cell
        c_t = f_t * c_prev + i_t * c_new_t
        # final hidden state
        h_t = o_t * T.tanh(c_t)

        # Input vector for softmax
        theta_t = T.dot(self.U, h_t) + self.b
        # Softmax prob vector
        y_hat_t = softmax(theta_t.T).T
        # Softmax wraps output in another list, why??
        # (specifically it outputs a 2-d row, not a 1-d column)
        # y_hat_t = y_hat_t[0]
        # Compute new cost
        out_label = T.argmax(y_hat_t)

        # final joint state
        ch_t = T.concatenate([c_t, h_t])

        return (out_label, ch_t), scan_module.until(T.eq(out_label, self.out_end))
Exemplo n.º 4
0
    def rnn_output(self, y_prev, h_prev):
        h_t = T.tanh(T.dot(self.Wh, h_prev))
        # compute new out_label
        y_hat_t = softmax((T.dot(self.U, h_t) + self.b).T).T
        out_label = T.argmax(y_hat_t)

        return (out_label,
                h_t), scan_module.until(T.eq(out_label, self.out_end))
Exemplo n.º 5
0
def ssaStep(vectFunc, x, t, tfinal, selection, mutation, S, reactions):
    vFunc = ([x, selection, mutation, S], vectFunc)
    v = vFunc(x, selection, mutation, S)
    a0 = tt.sum(v)
    r1 = shared(np.random.rand())
    tau = 1 / a0 * tt.log(1 / r1)
    prob = v / a0
    j = tt.raw_random.choice(reactions, p=prob)
    x += S[:, j]
    t = t + tau
    return x, t, scan_module.until(t > tfinal)
 def new_output(self, y_prev, h_prev):
     # gates (update, reset)
     z_t = sigmoid(T.dot(self.Uz, h_prev))
     r_t = sigmoid(T.dot(self.Ur, h_prev))
     # combine them
     h_new_t = T.tanh(r_t * T.dot(self.Uh, h_prev))
     h_t = z_t * h_prev + (1 - z_t) * h_new_t
     # compute new out_label
     y_hat_t = softmax((T.dot(self.U, h_t) + self.b).T).T
     out_label = T.argmax(y_hat_t)
     
     return (out_label, h_t), scan_module.until(T.eq(out_label, self.out_end))
Exemplo n.º 7
0
    def _step(
            i,
            pkm1, pkm2, qkm1, qkm2,
            k1, k2, k3, k4, k5, k6, k7, k8, r
    ):
        xk = -(x * k1 * k2) / (k3 * k4)
        pk = pkm1 + pkm2 * xk
        qk = qkm1 + qkm2 * xk
        pkm2 = pkm1
        pkm1 = pk
        qkm2 = qkm1
        qkm1 = qk

        xk = (x * k5 * k6) / (k7 * k8)
        pk = pkm1 + pkm2 * xk
        qk = qkm1 + qkm2 * xk
        pkm2 = pkm1
        pkm1 = pk
        qkm2 = qkm1
        qkm1 = qk

        old_r = r
        r = tt.switch(tt.eq(qk, zero), r, pk/qk)

        k1 += one
        k2 += k26update
        k3 += two
        k4 += two
        k5 += one
        k6 -= k26update
        k7 += two
        k8 += two

        big_cond = tt.gt(tt.abs_(qk) + tt.abs_(pk), BIG)
        biginv_cond = tt.or_(
            tt.lt(tt.abs_(qk), BIGINV),
            tt.lt(tt.abs_(pk), BIGINV)
        )

        pkm2 = tt.switch(big_cond, pkm2 * BIGINV, pkm2)
        pkm1 = tt.switch(big_cond, pkm1 * BIGINV, pkm1)
        qkm2 = tt.switch(big_cond, qkm2 * BIGINV, qkm2)
        qkm1 = tt.switch(big_cond, qkm1 * BIGINV, qkm1)

        pkm2 = tt.switch(biginv_cond, pkm2 * BIG, pkm2)
        pkm1 = tt.switch(biginv_cond, pkm1 * BIG, pkm1)
        qkm2 = tt.switch(biginv_cond, qkm2 * BIG, qkm2)
        qkm1 = tt.switch(biginv_cond, qkm1 * BIG, qkm1)

        return ((pkm1, pkm2, qkm1, qkm2,
                 k1, k2, k3, k4, k5, k6, k7, k8, r),
                until(tt.abs_(old_r - r) < (THRESH * tt.abs_(r))))
Exemplo n.º 8
0
 def model_layers_predict(self, x, *args):
     """
     Définie le model pour toutes les couches
     :param x: entrée
     :param args: liste des entrées (du temps précédent) pour les lstm
     """
     args = list(args)
     stop_condition = args[-1]
     args = args[:-1]
     outputs = list(self.model_layers(x, *args))
     # out = outputs[0]
     # outputs[0] = T.concatenate([T.round(out[:-3]), [out[-3]], T.round(out[-2:-1]), [out[-1]]])
     # cond = T.eq(T.argmax(x), T.argmax(stop_condition))
     cond = self.predict_stopping_condition(outputs[0], stop_condition)
     return tuple(outputs), scan_module.until(cond)  # x (output), vals_t, ..., condition d'arret
Exemplo n.º 9
0
    def step(ord):
        i = ord[0]

        xx1 = T.maximum(x1[i], x1[ord[1:]])
        yy1 = T.maximum(y1[i], y1[ord[1:]])
        xx2 = T.minimum(x2[i], x2[ord[1:]])
        yy2 = T.minimum(y2[i], y2[ord[1:]])

        w = T.maximum(0.0, xx2 - xx1 + 1)
        h = T.maximum(0.0, yy2 - yy1 + 1)
        inter = w * h
        ovr = inter / (areas[i] + areas[ord[1:]] - inter)

        inds = T.le(ovr, thresh).nonzero()[0]
        ord = ord[inds + 1]

        return (i, ord), until(order.size > 0)
Exemplo n.º 10
0
    def step(ord):
        i = ord[0]

        xx1 = T.maximum(x1[i], x1[ord[1:]])
        yy1 = T.maximum(y1[i], y1[ord[1:]])
        xx2 = T.minimum(x2[i], x2[ord[1:]])
        yy2 = T.minimum(y2[i], y2[ord[1:]])

        w = T.maximum(0.0, xx2 - xx1 + 1)
        h = T.maximum(0.0, yy2 - yy1 + 1)
        inter = w * h
        ovr = inter / (areas[i] + areas[ord[1:]] - inter)

        inds = T.le(ovr, thresh).nonzero()[0]
        ord = ord[inds + 1]

        return (i, ord), until(order.size > 0)
Exemplo n.º 11
0
    def find_perturb(perturbation):
        logits_os = model(inputs + (1 + over_shoot) * perturbation)
        y_pred = T.argmax(logits_os, axis=1)
        is_mistake = T.neq(y_pred, labels)
        current_ind = batch_indices[(1 - is_mistake).nonzero()]
        should_stop = T.all(is_mistake)

        # continue generating perturbation only for correctly classified
        inputs_subset = inputs[current_ind]
        perturbation_subset = perturbation[current_ind]
        labels_subset = labels[current_ind]
        batch_subset = T.arange(inputs_subset.shape[0])

        x_adv = inputs_subset + perturbation_subset
        logits = model(x_adv)
        corrects = logits[batch_subset, labels_subset]
        jac = jacobian(logits, x_adv, num_classes)

        # deepfool
        f = logits - T.shape_padright(corrects)
        w = jac - T.shape_padaxis(jac[batch_subset, labels_subset], axis=1)
        reduce_ind = range(2, inputs.ndim + 1)
        if norm == 'l2':
            dist = T.abs_(f) / w.norm(2, axis=reduce_ind)
        else:
            dist = T.abs_(f) / T.sum(T.abs_(w), axis=reduce_ind)
        # remove correct targets
        dist = T.set_subtensor(dist[batch_subset, labels_subset],
                               T.constant(np.inf))
        l = T.argmin(dist, axis=1)
        dist_l = dist[batch_subset, l].dimshuffle(0, 'x', 'x', 'x')
        # avoid numerical instability and clip max value
        if clip_dist is not None:
            dist_l = T.clip(dist_l, 0, clip_dist)
        w_l = w[batch_subset, l]
        if norm == 'l2':
            reduce_ind = range(1, inputs.ndim)
            perturbation_upd = dist_l * w_l / w_l.norm(
                2, reduce_ind, keepdims=True)
        else:
            perturbation_upd = dist_l * T.sgn(w_l)
        perturbation = ifelse(
            should_stop, perturbation,
            T.inc_subtensor(perturbation[current_ind], perturbation_upd))
        return perturbation, scan_module.until(should_stop)
    def _step(x_, h_, c_, is_complete_, n_samples):
      x_and_h = tensor.concatenate([x_, h_], axis=1)
      preact = tensor.dot(x_and_h, tparams["WLSTM"]) + tparams["bLSTM"]

      i = tensor.nnet.sigmoid(_lstm_slice(preact, 0, hidden_size))
      f = tensor.nnet.sigmoid(_lstm_slice(preact, 1, hidden_size))
      o = tensor.nnet.sigmoid(_lstm_slice(preact, 2, hidden_size))
      c = tensor.tanh(_lstm_slice(preact, 3, hidden_size))

      c = f * c_ + i * c
      h = o * tensor.tanh(c)
      
      decoder = tensor.dot(h, tparams['Wd']) + tparams['bd']
      softmax = tensor.nnet.softmax(decoder)
      predicted_prob, predicted_idx = tensor.max_and_argmax(softmax, axis=1)
      predicted_word_vector = tparams['Ws'][predicted_idx]
      
      is_end_reached = predicted_idx <= 0
      is_complete_ = is_complete_ + is_end_reached
      is_complete_sum = tensor.sum(is_complete_)
      
      return (predicted_word_vector, h, c, is_complete_, predicted_idx, predicted_prob), scan_module.until(tensor.eq(is_complete_sum, n_samples))
Exemplo n.º 13
0
 def _step(i, t, s):
     t *= (i - b) * value / i
     step = t / (a + i)
     s += step
     return ((t, s), until(tt.abs_(step) < threshold))
Exemplo n.º 14
0
 def _step(i, t, s):
     t *= (i - b) * value / i
     step = t / (a + i)
     s += step
     return ((t, s), until(tt.abs_(step) < threshold))
Exemplo n.º 15
0
def myFunc(val, preval):
    val1 = preval
    preval = ifelse(T.gt(val, 0.5), preval + 1, preval)
    return preval, scan_module.until(T.eq(val1, preval))
Exemplo n.º 16
0
Arquivo: ncg.py Projeto: delallea/ncg
    def fmin_cg_loop(old_fval, old_old_fval, *rest):
        xks  = rest[:n_elems]
        gfks = rest[n_elems:n_elems * 2]

        maxs = [ abs(gfk).max(axis=range(gfk.ndim)) for gfk in gfks ]
        if len(maxs) == 1:
            gnorm = maxs[0]
        else:
            gnorm = TT.maximum(maxs[0], maxs[1])
            for dx in maxs[2:]:
                gnorm = TT.maximum(gnorm, dx)

        pks  = rest[n_elems*2:]
        deltak = sum((gfk * gfk).sum() for gfk in gfks)

        old_fval_backup = old_fval
        old_old_fval_backup = old_old_fval

        alpha_k, old_fval, old_old_fval, derphi0, nw_gfks = \
                linesearch.line_search_wolfe2(f,myfprime, xks, pks,
                                              old_fval_backup,
                                              old_old_fval_backup,
                                              profile = profile,
                                             gfks = gfks)



        xks = [ ifelse(gnorm <= gtol, xk,
                              ifelse(TT.bitwise_or(TT.isnan(alpha_k),
                                                          TT.eq(alpha_k,
                                                                zero)), xk,
                                            xk+alpha_k*pk)) for xk, pk in zip(xks,pks)]
        gfkp1s_tmp = myfprime(*xks)
        gfkp1s = [ ifelse(TT.isnan(derphi0), nw_x, x) for nw_x, x in
                  zip(gfkp1s_tmp, nw_gfks)]


        yks = [gfkp1 - gfk for gfkp1, gfk in izip(gfkp1s, gfks)]
        # Polak-Ribiere formula.
        beta_k = TT.maximum(
                zero,
                sum((x * y).sum() for x, y in izip(yks, gfkp1s)) / deltak)
        pks  = [ ifelse(gnorm <= gtol, pk,
                               ifelse(TT.bitwise_or(TT.isnan(alpha_k),
                                                           TT.eq(alpha_k,
                                                                 zero)), pk, -gfkp1 +
                                             beta_k * pk)) for gfkp1,pk in zip(gfkp1s,pks) ]
        gfks = [ifelse(gnorm <= gtol,
                       gfk,
                       ifelse(
                           TT.bitwise_or(TT.isnan(alpha_k),
                                         TT.eq(alpha_k, zero)),
                           gfk,
                           gfkp1))
                for (gfk, gfkp1) in izip(gfks, gfkp1s)]

        stop = lazy_or(gnorm <= gtol, TT.bitwise_or(TT.isnan(alpha_k),
                                                TT.eq(alpha_k, zero)))# warnflag = 2
        old_fval     = ifelse(gnorm >gtol, old_fval, old_fval_backup)
        old_old_fval = ifelse(gnorm >gtol, old_old_fval,
                                     old_old_fval_backup)
        return ([old_fval, old_old_fval]+xks + gfks + pks,
                until(stop))