Exemplo n.º 1
0
   def forward(self):
      """
      Perform a forward step - activate the net input using logistic function
      """

      # Perform the activation (logistic function)
      self.output.setOutput((1.0 - gpu.exp(-self.input.getNetInput())) / (1.0 + gpu.exp(-self.input.getNetInput())))
Exemplo n.º 2
0
def nn_forward_pass(x, w, b, return_all=True):
    """
    Forward pass for multilayer feed-forward sigmoid neural network
    
    Hidden units have sigmoid non-linearity. 
    Output is soft-max.

    x: DxN matrix of input data
    w: Weights. List of weight matrices for each layer.
    b: Biases. List of bias vectors for each layer
    return_all: If True, returns hidden unit activations for each layer. If False
        just returns the output layer activations
    Returns a list h where each element is a matrix containing the activations
    for that layer. h[0] is input data x. 
    """
    # ---- TEMP HACK --------------
    # I should find a more seamless way of running in mixed (some operations
    # with numpy, some with gnumpy) mode.
    # I had to resort to this, because i needed the validation classification
    # step in nn_train to run on CPU with numpy. GPU ran out of memory.
    if isinstance(x, gnp.garray):
        use_gpu = True
    else:
        use_gpu = False

    layer_count = len(w)
    if return_all:
        hs = [x]  # unit activations for each layer
    h = x

    # all layers except the output layer
    for l in range(layer_count - 1):
        if use_gpu:
            a = gnp.dot(w[l].T, h) + b[l]
            h = gnp.logistic(a)
        else:
            a = np.dot(gnp.as_numpy_array(w[l]).T, h) + gnp.as_numpy_array(b[l])
            h = 1.0 / (1 + np.exp(-a))
        if return_all:
            hs.append(h)

    # output layer
    if use_gpu:
        h = gnp.dot(w[-1].T, h) + b[-1]
        h = gnp.exp(h) / gnp.sum(gnp.exp(h), axis=0)  # soft-max
    else:
        h = np.dot(gnp.as_numpy_array(w[-1]).T, h) + gnp.as_numpy_array(b[-1])
        h = np.exp(h) / np.sum(np.exp(h), axis=0)  # soft-max

    if return_all:
        hs.append(h)
        return hs
    else:
        return h
Exemplo n.º 3
0
def dbn_supervised_predict_exact(ws_vh, ws_v, ws_h, x):
    """
    Predict the class label of input x from supervised DBN
    Uses the exact method mentioned in section 6.2 of Hinton, Osindero, Teh 2006
    The free energy formula is taken from http://deeplearning.net/tutorial/rbm.html
    
    x: Input data. (NxD matrix)
    """
    L = len(ws_vh)
    N = x.shape[0]

    # make a forward pass to get from input layer to visible layer of top level
    # RBM
    h_prev = x.T

    # forward (bottom-up) pass, (use deterministic (we pass the activations, not
    # the stochastically sampled steps) forward pass)
    for l in range(L - 1):
        ah = gnp.dot(ws_vh[l].T, h_prev) + ws_h[l]
        h_prev = gnp.logistic(ah)

    H = ws_vh[-1].shape[0]  # number of visible units top level RBM
    Hx = h_prev.shape[0]  # number of hidden units in the penultimate layer
    K = H - Hx
    # (H - Hx) is the number of supervised inputs to top level RBM

    # for every class, assume it is the correct label and calculate its free energy
    y = gnp.zeros((K, N))
    free_energy = gnp.zeros((N, K))  # we actually calculate -free_energy
    for k in range(K):
        # set the current assumed class label
        y[k, :] = 1.0

        # visible unit vector
        v = gnp.concatenate((y, h_prev))
        e_v = gnp.dot(ws_v[-1].T, v)  # bias energy term

        ah = gnp.dot(ws_vh[-1].T, v) + ws_h[-1]
        e_h = gnp.sum(gnp.log(gnp.exp(ah) + 1.0), axis=0)

        free_energy[:, k] = e_v + e_h

        # zero the class labels for next iteration
        y[:, :] = 0.0

    # since these numbers may get pretty small, use the sum-exp trick for converting
    # these to probabilities
    pred_y = (
        gnp.exp(free_energy - gnp.max(free_energy, axis=1)[:, gnp.newaxis])
        / gnp.sum(gnp.exp(free_energy - gnp.max(free_energy, axis=1)[:, gnp.newaxis]), axis=1)[:, gnp.newaxis]
    )

    return pred_y
Exemplo n.º 4
0
def rect_log(x, computeGrad = False):
	if (not computeGrad):
		f = gp.log(x*(x>0)+1)* (x>0)
		return f

	g = (x>0) / (gp.exp(x))
	return g
Exemplo n.º 5
0
 def softmax(self, x):
     max=gp.max(x,axis=1)
     x=x-max[:,gp.newaxis]
     y=gp.exp(x)
     s=gp.sum(y,1)
     z=y/s[:,gp.newaxis]
     return z
Exemplo n.º 6
0
    def forward(self, X, test=False):
        """
        Feed-forward pass through the model
        X: ('batchsize' x 'context') matrix of word indices
        """
        batchsize = X.shape[0]
        R = self.R
        C = self.C
        bw = self.bw

        # Obtain word features
        tmp = R.as_numpy_array()[:,X.flatten()].flatten(order='F')  # flatten(), default in row-major order, order='F' means Fortran(column-major) order
        tmp = tmp.reshape((batchsize, self.K * self.context))   # reshape(), in row-major order
        words = np.zeros((batchsize, self.K, self.context))
        for i in range(batchsize):
            words[i,:,:] = tmp[i,:].reshape((self.K, self.context), order='F')
        words = gpu.garray(words)

        # Compute the hidden layer (predicted next word representation)
        acts = gpu.zeros((batchsize, self.K))
        for i in range(self.context):
            acts = acts + gpu.dot(words[:,:,i], C[i,:,:]) # the dot() of 2-D matrix is equiverlent to multiply
        acts = gpu.concatenate((acts, gpu.ones((batchsize, 1))), 1)

        # Compute softmax
        preds = gpu.dot(acts, gpu.concatenate((R, bw)))
        preds = gpu.exp(preds - preds.max(1).reshape(batchsize, 1))
        denom = preds.sum(1).reshape(batchsize, 1)
        preds = gpu.concatenate((preds / denom, gpu.ones((batchsize, 1))), 1)

        return (words, acts, preds.as_numpy_array())
Exemplo n.º 7
0
def loss_mclr(Yh, Y):
    """Compute mutinomial logistic regression loss for Yh, w.r.t. Y.

    Values in Yh should probably be network outputs, and each row in Y must
    be a +1/-1 indicator vector for the target class of a row in Yh.
    """
    obs_count = float(Y.shape[0])
    # Get boolean mask for each observation's target class
    cl_mask = (Y > 0.0)
    # Compute softmax distribution tranform of Yh
    sm_sum = gp.sum(gp.exp(Yh), axis=1)
    P = gp.exp(Yh) / sm_sum[:,gp.newaxis]
    dL = (P - cl_mask) / obs_count
    logP = gp.log(P) * cl_mask
    L = -gp.sum(logP) / obs_count
    return {'L': L, 'dL': dL}
Exemplo n.º 8
0
 def getErrorLoss(self, a0, a2,factor=1.0):
     """
     error is measured by neg log likelihood
     """
     pow=a2**a0
     p=gp.exp(-a2)*pow/self.factor[a0] 
     l=gp.log(p)
     return -l.sum(axis=1).mean()*factor
Exemplo n.º 9
0
   def forward(self):
      """
      Perform a forward step - activate the net input using logistic function
      """

      # Perform the activation
      self.output.setOutput(gpu.exp(self.input.getNetInput()))
      self.output.setOutput(self.output.getOutput() / (gpu.garray([gpu.sum(self.output.getOutput(),1)]).transpose()))
Exemplo n.º 10
0
    def compute_kernel_transformation(self, x_base, x_new):
        x_base = x_base if isinstance(x_base, gnp.garray) else gnp.garray(x_base)
        x_new = x_new if isinstance(x_new, gnp.garray) else gnp.garray(x_new)

        xx = x_new.dot(x_base.T)
        xx_base = (x_base**2).sum(axis=1)
        xx_new = (x_new**2).sum(axis=1)
        return gnp.exp(-1.0 / (2 * self.sigma**2) * (-2 * xx + xx_base + xx_new[:,gnp.newaxis]))
Exemplo n.º 11
0
 def safe_softmax(self, Y):
     """Compute a reasonably (numerically) safe softmax."""
     Y_max = gp.max(Y, axis=1)
     Y_max = Y_max[:,gp.newaxis]
     Y_exp = gp.exp(Y - Y_max)
     Y_sum = gp.sum(Y_exp, axis=1)
     Y_sum = Y_sum[:,gp.newaxis]
     Y_sm = Y_exp / Y_sum
     return Y_sm
Exemplo n.º 12
0
def log_exp_sum_1d(x):
    """
    This computes log(exp(x_1) + exp(x_2) + ... + exp(x_n)) as 
    x* + log(exp(x_1-x*) + exp(x_2-x*) + ... + exp(x_n-x*)), where x* is the
    max over all x_i.  This can avoid numerical problems.
    """
    x_max = x.max()
    if isinstance(x, gnp.garray):
        return x_max + gnp.log(gnp.exp(x - x_max).sum())
    else:
        return x_max + np.log(np.exp(x - x_max).sum())
    def forward(self, X, Im, test=False):
        """
        Feed-forward pass through the model
        X: ('batchsize' x 'context') matrix of word indices
        """
        batchsize = X.shape[0]
        Im = gpu.garray(Im)
        C = self.C
        M = self.M
        bw = self.bw
        J = self.J
        bj = self.bj
        Wfx = self.Wfx
        Whf = self.Whf
        Wfv = self.Wfv

        # Forwardprop images
        Im = gpu.concatenate((Im, gpu.ones((batchsize, 1))), 1)
        IF = gpu.dot(Im, gpu.concatenate((J, bj)))
        IF = IF * (IF > 0)

        # Obtain word features
        R = gpu.dot(Wfx, Whf)
        tmp = R.as_numpy_array()[:,X.flatten()].flatten(order='F')
        tmp = tmp.reshape((batchsize, self.K * self.context))
        words = np.zeros((batchsize, self.K, self.context))
        for i in range(batchsize):
            words[i,:,:] = tmp[i,:].reshape((self.K, self.context), order='F')
        words = gpu.garray(words)

        # Compute the hidden layer (predicted next word representation)
        acts = gpu.zeros((batchsize, self.K))
        for i in range(self.context):
            acts = acts + gpu.dot(words[:,:,i], C[i,:,:])
        acts = acts + gpu.dot(IF, M)

        # Multiplicative interaction
        F = gpu.dot(acts, Wfx) * gpu.dot(IF, Wfv)
        F = gpu.concatenate((F, gpu.ones((batchsize, 1))), 1)

        # Compute softmax
        preds = gpu.dot(F, gpu.concatenate((Whf, bw)))
        preds = gpu.exp(preds - preds.max(1).reshape(batchsize, 1))
        denom = preds.sum(1).reshape(batchsize, 1)
        preds = gpu.concatenate((preds / denom, gpu.ones((batchsize, 1))), 1)

        return (words, acts, IF, F, preds.as_numpy_array())
Exemplo n.º 14
0
    def costAndGrad(self, data, labels):

        # forward prop
        self.hActs[0] = data
        i = 1
        for w, b in self.stack:
            self.hActs[i] = w.dot(self.hActs[i - 1]) + b
            if i <= len(self.layerSizes):
                self.hActs[i] = self.activation(self.hActs[i])
            i += 1

        probs = self.hActs[-1] - gp.max(self.hActs[-1], axis=0)
        probs = gp.exp(probs)
        probs = probs / gp.sum(probs, axis=0)
        probs += (probs < 1e-8) * (1e-8 - probs)

        labelMat = np.zeros(probs.shape)
        labelMat[labels, range(self.mbSize)] = 1
        labelMat = gp.garray(labelMat)
        cost = -(1. / self.mbSize) * gp.sum(labelMat * gp.log(probs))

        if not self.train:
            return cost, None

        # back prop
        self.deltas[-1] = probs - labelMat
        i = len(self.layerSizes) - 1
        for w, b in reversed(self.stack[1:]):
            grad = self.activation(self.hActs[i + 1], True)
            self.deltas[i] = w.T.dot(self.deltas[i + 1]) * grad
            i -= 1

        # compute gradients
        for i in range(len(self.grad)):
            self.grad[i][0] = (1. / self.mbSize) * self.deltas[i].dot(
                self.hActs[i].T)
            self.grad[i][1] = (1. / self.mbSize) * gp.sum(
                self.deltas[i], axis=1).reshape(-1, 1)

            # add gaussian noise
            # self.grad[i][0] += .01 * gp.randn(self.grad[i][0].shape)
            # self.grad[i][1] += .01 * gp.randn(self.grad[i][1].shape)

        return cost, self.grad
Exemplo n.º 15
0
    def costAndGrad(self,data,labels):
        
        # forward prop
        self.hActs[0] = data
        i = 1
        for w,b in self.stack:
            self.hActs[i] = w.dot(self.hActs[i-1])+b
            if i <= len(self.layerSizes):
                self.hActs[i] = self.activation(self.hActs[i])
            i += 1

        probs = self.hActs[-1]-gp.max(self.hActs[-1],axis=0)
        probs = gp.exp(probs)
        probs = probs/gp.sum(probs,axis=0)
        probs += (probs < 1e-8)*(1e-8-probs)

        labelMat = np.zeros(probs.shape)
        labelMat[labels,range(self.mbSize)] = 1
        labelMat = gp.garray(labelMat)
        cost = -(1./self.mbSize)*gp.sum(labelMat*gp.log(probs))

        if not self.train:
            return cost,None

        # back prop
        self.deltas[-1] = probs-labelMat
        i = len(self.layerSizes)-1
        for w,b in reversed(self.stack[1:]):
            grad = self.activation(self.hActs[i+1], True)
            self.deltas[i] = w.T.dot(self.deltas[i+1])*grad
            i -= 1

        # compute gradients
        for i in range(len(self.grad)):
            self.grad[i][0] = (1./self.mbSize)*self.deltas[i].dot(self.hActs[i].T)
            self.grad[i][1] = (1./self.mbSize)*gp.sum(self.deltas[i],axis=1).reshape(-1,1)

            # add gaussian noise
            # self.grad[i][0] += .01 * gp.randn(self.grad[i][0].shape)
            # self.grad[i][1] += .01 * gp.randn(self.grad[i][1].shape)

        return cost,self.grad
Exemplo n.º 16
0
    def costAndGrad(self, data, labels):

        # forward prop
        self.hActs[0] = data
        i = 1
        for w, b in self.stack:
            self.hActs[i] = w.dot(self.hActs[i - 1]) + b
            if i <= len(self.layerSizes):
                self.hActs[i] = (1 / 2.) * (
                    self.hActs[i] + gp.sign(self.hActs[i]) * self.hActs[i])
            i += 1

        probs = self.hActs[-1] + gp.min(self.hActs[-1], axis=0)
        probs = gp.exp(probs)
        probs = probs / gp.sum(probs, axis=0)

        labelMat = np.zeros(probs.shape)
        labelMat[labels, range(self.mbSize)] = 1
        labelMat = gp.garray(labelMat)
        cost = -(1. / self.mbSize) * gp.sum(labelMat * gp.log(probs))

        if not self.train:
            return cost, None

        # back prop
        self.deltas[-1] = probs - labelMat
        i = len(self.layerSizes) - 1
        for w, b in reversed(self.stack[1:]):
            self.deltas[i] = w.T.dot(self.deltas[i + 1]) * gp.sign(
                self.hActs[i + 1])
            i -= 1

        # compute gradients
        for i in range(len(self.grad)):
            self.grad[i][0] = (1. / self.mbSize) * self.deltas[i].dot(
                self.hActs[i].T)
            self.grad[i][1] = (1. / self.mbSize) * gp.sum(
                self.deltas[i], axis=1).reshape(-1, 1)

        return cost, self.grad
Exemplo n.º 17
0
Arquivo: nnet.py Projeto: awni/awni_ml
    def costAndGrad(self,data,labels):
        
        # forward prop
        self.hActs[0] = data
        i = 1
        for w,b in self.stack:
            self.hActs[i] = w.dot(self.hActs[i-1])+b
            if i <= len(self.layerSizes):
                self.hActs[i] = (1/2.)*(self.hActs[i]+gp.sign(self.hActs[i])*self.hActs[i])
            i += 1

        probs = self.hActs[-1]+gp.min(self.hActs[-1],axis=0)
        probs = gp.exp(probs)
        probs = probs/gp.sum(probs,axis=0)

        labelMat = np.zeros(probs.shape)
        labelMat[labels,range(self.mbSize)] = 1
        labelMat = gp.garray(labelMat)
        cost = -(1./self.mbSize)*gp.sum(labelMat*gp.log(probs))

        if not self.train:
            return cost,None

        # back prop
        self.deltas[-1] = probs-labelMat
        i = len(self.layerSizes)-1
        for w,b in reversed(self.stack[1:]):
            self.deltas[i] = w.T.dot(self.deltas[i+1])*gp.sign(self.hActs[i+1])
            i -= 1
        
        # compute gradients
        for i in range(len(self.grad)):
            self.grad[i][0] = (1./self.mbSize)*self.deltas[i].dot(self.hActs[i].T)
            self.grad[i][1] = (1./self.mbSize)*gp.sum(self.deltas[i],axis=1).reshape(-1,1)

        return cost,self.grad
Exemplo n.º 18
0
Arquivo: rbm.py Projeto: surban/ml
 def metropolis_flip_sample(self, vis_start, iterations, beta=1, abeta=1):
     """Flips a randomly chosen bit and accepts the change if the
     resulting free energy is lower or with probability exp(-abeta*dE)
     where dE is the positive difference in energy. 
     Repeats for given iterations."""
     vis = vis_start.copy()
     fes = self.free_energy(vis)
     n_total_flips = 0
 
     for i in range(iterations):
         # flip a bit at random
         f = np.random.randint(0, vis.shape[1])
         vis_prop = vis.copy()
         vis_prop[:,f] = 1-vis[:,f]
     
         # calculate new free energy 
         fes_prop = self.free_energy(vis_prop, beta=beta)
         fes_diff = fes_prop - fes
     
         # accept if it is lower or with negative exponential probability
         fes_smaller = fes_diff <= 0
         acc_p = fes_smaller + (1-fes_smaller) * gp.exp(-(1-fes_smaller)*abeta*fes_diff)
         acc_rng = gp.rand(acc_p.shape)
         acc = acc_rng <= acc_p
     
         # statistics
         n_flips = gp.sum(acc)
         n_total_flips += n_flips
     
         # compose new state
         acc_t = gp.tile(acc, (vis.shape[1], 1)).T
         vis = acc_t * vis_prop + (1-acc_t) * vis
         fes = acc * fes_prop + (1-acc) * fes
     
     #print "Total number of flips: ", n_total_flips
     return vis
Exemplo n.º 19
0
def log_exp_sum(x, axis=1):
    x_max = x.max(axis=axis)
    if isinstance(x, gnp.garray):
        return (x_max + gnp.log(gnp.exp(x - x_max[:,gnp.newaxis]).sum(axis=axis))).asarray()
    else:
        return x_max + np.log(np.exp(x - x_max[:,np.newaxis]).sum(axis=axis))
Exemplo n.º 20
0
Arquivo: dnn.py Projeto: C2Tao/HMM
 def forward(self):
     self.x = self.f(self.x)
     self.s = gpu.exp(gpu.dot(self.x, self.w) + self.b)
     self.s /= gpu.sum(self.s, 1).reshape(self.q, 1)
Exemplo n.º 21
0
 def exp(A):
     return gp.exp(A)
Exemplo n.º 22
0
 def sigmoid(x):
     den = 1.0 + gp.exp(-1.0 * x)
     d = 1.0 / den
     return d
Exemplo n.º 23
0
    def compute_kernel_matrix(self, x):
        x = x if isinstance(x, gnp.garray) else gnp.garray(x)
        xx = x.dot(x.T)
        x_diag = safe_diag(xx)

        return gnp.exp(-1.0 / (2 * self.sigma**2) * (-2 * xx + x_diag + x_diag[:,gnp.newaxis]))
Exemplo n.º 24
0
 def softmax_old(x):
     y = gp.max(x, axis=1)[:, gp.newaxis]
     logsumexp = y + gp.log(gp.sum((gp.exp(x - y)), axis=1))[:, gp.newaxis]
     return gp.exp(x - logsumexp)
Exemplo n.º 25
0
 def softmax1(A):
     Z = gp.exp(A)
     return Z / gp.sum(Z, axis=1)[:, gp.newaxis]
Exemplo n.º 26
0
def softmax(x):
    return gnp.exp(x) / gnp.exp(x).sum()
Exemplo n.º 27
0
 def forward(self):
     self.x = self.f(self.x)
     self.s = gpu.exp(gpu.dot(self.x, self.w) + self.b)
     self.s /= gpu.sum(self.s, 1).reshape(self.q, 1)
Exemplo n.º 28
0
def sigmoid(t):
    return 1. / (1. + gnp.exp(-t))
Exemplo n.º 29
0
Arquivo: ais.py Projeto: surban/ml
 def base_p_vis(self, vis):
     "Probability of visible units in base rate RBM"
     punit = (gp.exp(gp.dot(self.base_bias_vis, vis)) / 
              (1 + gp.exp(self.base_bias_vis)))
     return gp.prod(punit, axis=1)
Exemplo n.º 30
0
Arquivo: ais.py Projeto: surban/ml
 def base_partition_function(self):
     "Computes the partition function of the base rate RBM"
     part_vis = gp.prod(1 + gp.exp(self.base_bias_vis))
     part_hid = 2**self.rbm.n_hid
     return part_vis * part_hid
Exemplo n.º 31
0
def activation_softmax(x):
    result = x - g.max(x, axis=1)[:, g.newaxis]
    result = g.exp(result)
    result = result / g.sum(result, axis=1)[:, g.newaxis]
    return result
Exemplo n.º 32
0
def sigmoid(x):
    return 1. / (1 + gnp.exp(-x))
Exemplo n.º 33
0
 def softmax_grounded(b):
     z = gp.zeros((b.shape[0], 1))
     b_ = gp.concatenate((z, b), axis=1)
     y_ = gp.exp(b_)
     return y_ / (y_.sum(1)[:, gp.newaxis])
Exemplo n.º 34
0
 def output(self, A, Z=None):
     # Note gnumpy does not have a expand_dims function
     amax = A.max(axis=1)
     Y = gnp.exp(A - amax.reshape(amax.size, 1))
     ysum = Y.sum(axis=1)
     return Y / ysum.reshape(ysum.size, 1)
Exemplo n.º 35
0
 def softmax(A):
     A -= gp.max(A, axis=1)[:, gp.newaxis]
     Z = gp.exp(A)
     return Z / gp.sum(Z, axis=1)[:, gp.newaxis]
Exemplo n.º 36
0
 def d_exp_penalty(x, sigma):
     return ((2 * (1 / sigma) * x * gp.exp(-x**2 / sigma)))
Exemplo n.º 37
0
def evaluate_unigram_partition(data, batch_size, num_steps, num_ensembles, eos_id, fp = 'simple-examples/ckpt/random training order unigram_partition_small/', probs_fn = 'test_set_probs_no_alpha.out'):
		epoch_size = ((len(data) // batch_size) - 1) // num_steps
		start_time = time.time()
		costs = 0.0
		iters = 0
		
		full_probs = []
		for i in range(num_ensembles):
				print(i)
				#full_probs[i] = np.loadtxt(fp + 'ensemble' + str(i+1) + '/test_set_probs_no_alpha.out', delimiter = ',')
				full_probs.append(np.asarray(pd.read_csv(fp + 'ensemble' + str(i+1) + '/' + probs_fn, delimiter = ',', header = None)))
				print(np.shape(full_probs[i]))
				# for ii in range(len(full_probs[i])):
				#     full_probs[i][ii] = full_probs[i][ii] / np.sum(full_probs[i][ii])

		print('reading in probs done')
		id_to_model = {}
		with open(fp + 'id_to_model.out', 'rb') as f:
				csv_reader = csv.reader(f, delimiter = ',', quotechar = '|')
				for row in csv_reader:
						row_list = [x for x in row if (x != '[' and x != ']' and x != '' and x != ' ')]
						#print(row_list)
						#row_list = row.split(',')
						row_list = [int(i) for i in row_list]
						id_to_model[row_list[0]] = row_list[1:len(row_list)]

		print('reading in id_to_model done')
		#print(id_to_model[1344])
		#probs = tf.nn.softmax(probs)
		# print(np.sum(probs[0]))
		# print(np.sum(probs[50]))
		#print(len(probs[0]))
		next_is_start_of_sentence = True
		flaggg = True
		#sent_list = reader.get_sentence_list(data = data, eos_id = eos_id)
		for step, (x, y) in enumerate(reader.ptb_iterator(data, batch_size,
																										num_steps)):
				if next_is_start_of_sentence:
						x = x[0,0]
						if x in id_to_model:
								models_included = id_to_model[x]
								coef = 1
						else:
								models_included = [1,2,3,4,5,6,7,8,9]
								coef = 1
				if x == eos_id:
						#cost = -1 * gpu.log(full_probs[0][step])
						models_included = [1]
						coef = 1
						next_is_start_of_sentence = True 
				else:
						next_is_start_of_sentence = False
						#coef = 0.5
						#models_included = id_to_model[x]
				probs = 0
				denom = 0
				for m in models_included:
						if m == 1:
								probs += full_probs[m-1][step]
								denom += 1
						else:
								#coef = 0.5
								probs += coef*full_probs[m-1][step]
								denom += coef
				probs = probs / float(denom)
				cost = -1 * gpu.log(probs)

				# print(step)
				# print(x)
				# print(y)
				# print(probs)
				#print(probs[0])
				#cost = -1 * gpu.log(probs[step][0,y[0,0]])
				#print(cost)
				'''
				loss = tf.nn.seq2seq.sequence_loss_by_example(
						[logits],
						[tf.reshape(y, [-1])],
						[tf.ones([batch_size * num_steps], dtype=tf.float64)])

				print(loss)
				cost = tf.reduce_sum(loss) / batch_size
				print(cost)
				'''
				costs += cost
				iters += num_steps
		
				if step % (epoch_size // 10) == 10:
						print("%.3f perplexity: %.3f speed: %.0f wps" %
								(step * 1.0 / epoch_size, gpu.exp(costs / iters),
								iters * batch_size / (time.time() - start_time)))

		return gpu.exp(costs / iters) 
Exemplo n.º 38
0
    def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
        y = gnp.exp(pred - pred.max(axis=1)[:,gnp.newaxis])
        y = y / y.sum(axis=1)[:,gnp.newaxis]

        return -(self.target * gnp.log(y + _SMALL_CONSTANT)).sum(), y - self.target
Exemplo n.º 39
0
 def sigmoid_prime(x):
     den = 1.0 + gp.exp(-1.0 * x)
     d = (gp.exp(-1.0 * x)) / den**2
     return d
Exemplo n.º 40
0
 def output(self, A, Z=None):
     # Note gnumpy does not have a expand_dims function
     amax = A.max(axis=1)
     Y = gnp.exp(A - amax.reshape(amax.size,1))
     ysum = Y.sum(axis=1)
     return Y / ysum.reshape(ysum.size,1)
Exemplo n.º 41
0
def activation_softmax(x):
    result = x - g.max(x,axis=1)[:,g.newaxis]
    result = g.exp(result)
    result = result / g.sum(result,axis=1)[:,g.newaxis]
    return result
Exemplo n.º 42
0
 def exp_penalty(x, sigma):
     return x.shape[1] - ((gp.exp(-x**2 / sigma)).sum()) / x.shape[0]
Exemplo n.º 43
0
def sigmoid(z):
    return 1 / (1 + gnp.exp(-z))
Exemplo n.º 44
0
def sigmoid(t):
    return 1. / (1. + gnp.exp(-t))
Exemplo n.º 45
0
def tanh(x):
    return (gnp.exp(x) - gnp.exp(-x)) / (gnp.exp(x) + gnp.exp(-x))
Exemplo n.º 46
0
def sigmoid(z):

    return 1 / (1 + gpu.exp(-z))