Exemplo n.º 1
0
        def unit(parent_x, child_h, child_c, child_exists):
            (h_i, h_o,
             h_u), _ = theano.map(fn=lambda Ui, Uo, Uu, h, exists:
                                  (exists * T.dot(Ui, h), exists * T.dot(
                                      Uo, h), exists * T.dot(Uu, h)),
                                  sequences=[
                                      self.U_i, self.U_o, self.U_u, child_h,
                                      child_exists
                                  ])

            i = T.nnet.sigmoid(
                T.dot(self.W_i, parent_x) + h_i.sum(axis=0) + self.b_i)
            o = T.nnet.sigmoid(
                T.dot(self.W_o, parent_x) + h_o.sum(axis=0) + self.b_o)
            u = T.tanh(T.dot(self.W_u, parent_x) + h_u.sum(axis=0) + self.b_u)

            def _sub_f(U):
                sub_h_f, _ = theano.map(
                    fn=lambda sub_U, h, exists: exists * T.dot(sub_U, h),
                    sequences=[U, child_h, child_exists])
                return sub_h_f.sum(axis=0)

            h_f, _ = theano.map(fn=lambda U: _sub_f(U), sequences=[self.U_f])
            f = (T.nnet.sigmoid(
                T.dot(self.W_f, parent_x).dimshuffle('x', 0) + h_f +
                self.b_f.dimshuffle('x', 0)) * child_exists.dimshuffle(0, 'x'))

            c = i * u + T.sum(f * child_c, axis=0)
            h = o * T.tanh(c)
            return h, c
Exemplo n.º 2
0
        def unit(parent_x, child_h, child_c, child_exists):
            (h_i, h_o, h_u), _ = theano.map(
                fn=lambda Ui, Uo, Uu, h, exists:
                    (exists * T.dot(Ui, h), exists * T.dot(Uo, h), exists * T.dot(Uu, h)),
                sequences=[self.U_i, self.U_o, self.U_u, child_h, child_exists])

            i = T.nnet.sigmoid(T.dot(self.W_i, parent_x) + h_i.sum(axis=0) + self.b_i)
            o = T.nnet.sigmoid(T.dot(self.W_o, parent_x) + h_o.sum(axis=0) + self.b_o)
            u = T.tanh(T.dot(self.W_u, parent_x) + h_u.sum(axis=0) + self.b_u)

            def _sub_f(U):
                sub_h_f, _ = theano.map(
                    fn=lambda sub_U, h, exists: exists * T.dot(sub_U, h),
                    sequences=[U, child_h, child_exists])
                return sub_h_f.sum(axis=0)

            h_f, _ = theano.map(
                fn=lambda U: _sub_f(U),
                sequences=[self.U_f])
            f = (T.nnet.sigmoid(
                    T.dot(self.W_f, parent_x).dimshuffle('x', 0) + h_f +
                    self.b_f.dimshuffle('x', 0)) *
                 child_exists.dimshuffle(0, 'x'))

            c = i * u + T.sum(f * child_c, axis=0)
            h = o * T.tanh(c)
            return h, c
Exemplo n.º 3
0
def compute_cost_log_in_parallel(original_rnn_outputs, labels, func, x_ends,
                                 y_ends):
    mask = T.log(1 - T.or_(T.eq(labels, T.zeros_like(labels)),
                           T.eq(labels, shift_matrix(labels, 2))))

    initial_state = T.log(T.zeros_like(labels))
    initial_state = T.set_subtensor(initial_state[:, 0], 0)

    def select_probabilities(rnn_outputs, label):
        return rnn_outputs[:, label]

    rnn_outputs, _ = theano.map(select_probabilities,
                                [original_rnn_outputs, labels])
    rnn_outputs = T.log(rnn_outputs.dimshuffle((1, 0, 2)))

    def forward_step(probabilities, last_probabilities):
        all_forward_probabilities = T.stack(
            last_probabilities + probabilities,
            log_shift_matrix(last_probabilities, 1) + probabilities,
            log_shift_matrix(last_probabilities, 2) + probabilities + mask,
        )

        result = func(all_forward_probabilities, 0)
        return result

    forward_probabilities, _ = theano.scan(fn=forward_step,
                                           sequences=rnn_outputs,
                                           outputs_info=initial_state)
    forward_probabilities = forward_probabilities.dimshuffle((1, 0, 2))

    def compute_cost(forward_probabilities, x_end, y_end):
        return -func(forward_probabilities[x_end - 1, y_end - 2:y_end])

    return theano.map(compute_cost, [forward_probabilities, x_ends, y_ends])[0]
Exemplo n.º 4
0
Arquivo: ctc.py Projeto: choko/ctc
def compute_cost_log_in_parallel(original_rnn_outputs, labels, func, x_ends, y_ends):
	mask = T.log(1 - T.or_(T.eq(labels, T.zeros_like(labels)), T.eq(labels, shift_matrix(labels, 2))))

	initial_state = T.log(T.zeros_like(labels))
	initial_state = T.set_subtensor(initial_state[:,0], 0)

	def select_probabilities(rnn_outputs, label):
		return rnn_outputs[:,label]	

	rnn_outputs, _ = theano.map(select_probabilities, [original_rnn_outputs, labels])
	rnn_outputs = T.log(rnn_outputs.dimshuffle((1,0,2)))

	def forward_step(probabilities, last_probabilities):
		all_forward_probabilities = T.stack(
			last_probabilities + probabilities,
			log_shift_matrix(last_probabilities, 1) + probabilities,
			log_shift_matrix(last_probabilities, 2) + probabilities + mask,
		)

		result = func(all_forward_probabilities, 0)
		return result

	forward_probabilities, _ = theano.scan(fn = forward_step, sequences = rnn_outputs, outputs_info = initial_state)
	forward_probabilities = forward_probabilities.dimshuffle((1,0,2))

	def compute_cost(forward_probabilities, x_end, y_end):
		return -func(forward_probabilities[x_end-1,y_end-2:y_end])

	return theano.map(compute_cost, [forward_probabilities, x_ends, y_ends])[0]
Exemplo n.º 5
0
def compute_cost_with_cross_entropy_in_parallel(original_rnn_outputs, labels,
                                                x_ends, y_ends):
    mask = T.log(1 - T.or_(T.eq(labels, T.zeros_like(labels)),
                           T.eq(labels, shift_matrix(labels, 2))))
    arange = T.arange(labels.shape[1])

    initial_state = T.log(T.zeros_like(labels))
    initial_state = T.set_subtensor(initial_state[:, 0], 0)

    def select_probabilities(rnn_outputs, label):
        return rnn_outputs[:, label]

    rnn_outputs, _ = theano.map(select_probabilities,
                                [original_rnn_outputs, labels])
    rnn_outputs = T.log(rnn_outputs.dimshuffle((1, 0, 2)))

    def forward_step(probabilities, last_probabilities):
        all_forward_probabilities = T.stack(
            last_probabilities + probabilities,
            log_shift_matrix(last_probabilities, 1) + probabilities,
            log_shift_matrix(last_probabilities, 2) + probabilities + mask,
        )

        max_probability, backlink = T.max_and_argmax(all_forward_probabilities,
                                                     0)
        backlink = arange - backlink
        return max_probability, backlink

    results, _ = theano.scan(fn=forward_step,
                             sequences=rnn_outputs,
                             outputs_info=[initial_state, None])
    forward_probabilities, backward_pointers = results

    def compute_cost(rnn_outputs, forward_probabilities, backward_pointers,
                     x_end, y_end, label):
        def backward_step(backlinks, position):
            new_position = backlinks[position]
            return new_position, position

        initial_state = T.argmax(
            forward_probabilities[x_end - 1, y_end - 2:y_end]) + y_end - 2

        results, _ = theano.scan(fn=backward_step,
                                 sequences=backward_pointers[0:x_end, :],
                                 outputs_info=[initial_state, None],
                                 go_backwards=True)
        alignment = label[results[1][::-1]]

        return aggregate(categorical_crossentropy(rnn_outputs[0:x_end],
                                                  alignment),
                         mode='sum')

    forward_probabilities = forward_probabilities.dimshuffle((1, 0, 2))
    backward_pointers = backward_pointers.dimshuffle((1, 0, 2))

    return theano.map(compute_cost, [
        original_rnn_outputs, forward_probabilities, backward_pointers, x_ends,
        y_ends, labels
    ])[0]
Exemplo n.º 6
0
    def compute_objective_and_gradients(self, nSamp):
        hsamp = self.mrec.getSample(self.Y, nSamp)

        # evaluate the generative model density P_\theta(y_i , h_i)
        p_yh, _ = theano.map(self.mprior.evaluateLogDensity, sequences=hsamp)
        # evaluate the recognition model density Q_\phi(h_i | y_i)
        q_hgy, _ = theano.map(self.mrec.evalLogDensity, sequences=hsamp)

        ff = (p_yh - q_hgy)
        sortidx = ff.argsort(axis=0)

        fmax = ff[(sortidx[-1], T.arange(ff.shape[-1]))].dimshuffle('x', 0)

        f_hy = T.exp(ff - fmax)
        sum_across_samples = f_hy.sum(axis=0, keepdims=True)
        Lhat = T.log(sum_across_samples / nSamp) + fmax

        col_idx = T.arange(ff.shape[-1])
        # This 1e-12 constant is for debugging nans
        # in other parts of code. We know we'll get
        # nans where we'll then overwrite. Use it with
        # compute cross-validated estimates of Lhat                       # nanguard mode.
        hold_out_except_last = T.log(
            (sum_across_samples - f_hy) / (nSamp - 1)) + fmax  #+1e-12) + fmax
        f2max_vec = ff[(sortidx[-2], T.arange(ff.shape[-1]))]
        f2max = f2max_vec.dimshuffle('x', 0)
        # Do tricky things to keep the numerics in order (avoid a term being \approxeq 0)
        ff_nolast = T.set_subtensor(ff[(sortidx[-1], col_idx)], f2max_vec)
        f_hy_last = T.exp(ff_nolast - f2max)
        # compute held-out sum when we hold out the maximum element
        hold_out_last = T.log(
            (f_hy_last.sum(axis=0, keepdims=True) - f_hy_last) /
            (nSamp - 1)) + f2max
        # compute final held-out estimates
        hold_out = T.set_subtensor(
            hold_out_except_last[(sortidx[-1], col_idx)],
            hold_out_last[(sortidx[-1], col_idx)])

        Lhat_cv = Lhat - hold_out
        the_ws = f_hy / sum_across_samples

        weighted_q = T.sum((Lhat_cv * q_hgy + the_ws * ff).mean(axis=1))
        #weighted_q = T.sum((Lhat_cv*q_hgy + the_ws*(p_yh-q_hgy)).sum(axis=1))

        # gradients for approximate posterior
        dqhgy = T.grad(cost=weighted_q,
                       wrt=self.mrec.getParams(),
                       consider_constant=([the_ws, Lhat_cv] + hsamp),
                       disconnected_inputs='ignore')

        # gradients for prior
        dpyh = T.grad(cost=T.sum((the_ws * ff).mean(axis=1)),
                      wrt=self.mprior.getParams(),
                      consider_constant=hsamp + [the_ws],
                      disconnected_inputs='ignore')
        #dpyh = T.grad(cost=T.sum((the_ws*(p_yh-q_hgy)).sum(axis=1)), wrt = self.mprior.getParams(), consider_constant=hsamp + [the_ws], disconnected_inputs='ignore')

        return [Lhat.mean(), dpyh, dqhgy]
Exemplo n.º 7
0
def make_nade(D, z_dim):
    log("make_nade with D={},z_dim={},g={}".format(D, z_dim, g))
    x = T.fmatrix('x')

    c_vals = np.random.normal(0, 1, size=(1, z_dim)).astype('float32')
    c = theano.shared(c_vals, name="c")
    p_x = 1

    def a_adder(W_col_T, x_i, acc):
        W_col_T.name = "W_col_T"
        prod = W_col_T * T.sum(x_i)
        prod.name = "prod"
        ret_T = acc.T + prod
        return ret_T.T

    """
    for i in range(D):
        W_col_vals = np.random.normal(0,1,size=(z_dim,1)).astype('float32')
        W_col = theano.shared(W_col_vals,name="W_col_%d"%(i+1))
        W_cols.append(W_col)
    """
    W_vals = np.random.normal(0, 1, size=(z_dim, D)).astype('float32')
    W = theano.shared(W_vals, name="W")

    a_s_W, _u = theano.scan(fn=a_adder,
                            outputs_info=c[0, :],
                            sequences=[W.T, x])
    a_s_excess = T.concatenate([c, a_s_W], axis=0)
    a_s = a_s_excess[:D, :]

    V_vals = np.random.normal(0, 1, size=(D, z_dim)).astype('float32')
    V = theano.shared(V_vals, name="V")

    hs = g(a_s)

    b_val = np.random.normal(0, 1, size=(D, 1)).astype('float32')
    b = theano.shared(b_val, name="b")

    def scan_p_x_cond(V_row, hi, b_i):
        p_x_cond = g(T.dot(V_row, hi) + b_i)
        return p_x_cond

    p_x_cond, _u = theano.map(fn=scan_p_x_cond, sequences=[V, hs, b])

    def scan_p_x_cond_obs(x_i, p):
        ret = x_i * p + (1 - x_i) * (1 - p)
        return ret

    p_x_cond_obs, _u = theano.map(fn=scan_p_x_cond_obs,
                                  sequences=[x, p_x_cond])

    nll = -T.sum(T.log(p_x_cond_obs))

    p_x = T.prod(p_x_cond_obs)

    return (W, c, V, b), x, hs, p_x, nll, p_x_cond
Exemplo n.º 8
0
    def step(visible, filtered_hidden_mean_m1, filtered_hidden_cov_m1):
        A, B = transition, emission  # (h, h), (h, v)

        # Shortcuts for the filtered mean and covariance from the previous
        # time step.
        f_m1 = filtered_hidden_mean_m1  # (n, h)
        F_m1 = filtered_hidden_cov_m1  # (n, h, h)

        # Calculate mean of joint.
        hidden_mean = T.dot(f_m1, A) + hnm  # (n, h)

        visible_mean = T.dot(hidden_mean, B) + vnm  # (n, v)

        # Calculate covariance of joint.
        hidden_cov = stacked_dot(A.T, stacked_dot(F_m1, A))  # (n, h, h)

        hidden_cov += hnc

        visible_cov = stacked_dot(  # (n, v, v)
            B.T, stacked_dot(hidden_cov, B))
        visible_cov += vnc

        visible_hidden_cov = stacked_dot(hidden_cov, B)  # (n, h, v)

        visible_error = visible - visible_mean  # (n, v)

        inv_visible_cov, _ = theano.map(lambda x: matrix_inverse(x),
                                        visible_cov)  # (n, v, v)

        # I don't know a better name for this monster.
        visible_hidden_cov_T = visible_hidden_cov.dimshuffle(0, 2,
                                                             1)  # (n, v, h)
        D = stacked_dot(inv_visible_cov, visible_hidden_cov_T)

        f = (
            D * visible_error.dimshuffle(0, 1, 'x')  # (n, h)
        ).sum(axis=1)
        f += hidden_mean

        F = hidden_cov
        F -= stacked_dot(visible_hidden_cov, D)

        log_l = (
            inv_visible_cov *  # (n,)
            visible_error.dimshuffle(0, 1, 'x') *
            visible_error.dimshuffle(0, 'x', 1)).sum(axis=(1, 2))
        log_l *= -.5

        dets, _ = theano.map(lambda x: det(x), visible_cov)

        log_l -= 0.5 * T.log(dets)
        log_l -= np.log(2 * np.pi)

        return f, F, log_l
Exemplo n.º 9
0
Arquivo: lds.py Projeto: ddofer/breze
    def step(visible, filtered_hidden_mean_m1, filtered_hidden_cov_m1):
        A, B = transition, emission                         # (h, h), (h, v)

        # Shortcuts for the filtered mean and covariance from the previous
        # time step.
        f_m1 = filtered_hidden_mean_m1                      # (n, h)
        F_m1 = filtered_hidden_cov_m1                       # (n, h, h)

        # Calculate mean of joint.
        hidden_mean = T.dot(f_m1, A) + hnm                  # (n, h)

        visible_mean = T.dot(hidden_mean, B) + vnm          # (n, v)

        # Calculate covariance of joint.
        hidden_cov = stacked_dot(
            A.T, stacked_dot(F_m1, A))                      # (n, h, h)

        hidden_cov += hnc

        visible_cov = stacked_dot(                          # (n, v, v)
            B.T, stacked_dot(hidden_cov, B))
        visible_cov += vnc

        visible_hidden_cov = stacked_dot(hidden_cov, B)     # (n, h, v)

        visible_error = visible - visible_mean              # (n, v)

        inv_visible_cov, _ = theano.map(
            lambda x: matrix_inverse(x), visible_cov)       # (n, v, v)

        # I don't know a better name for this monster.
        visible_hidden_cov_T = visible_hidden_cov.dimshuffle(0, 2, 1)   # (n, v, h)
        D = stacked_dot(inv_visible_cov, visible_hidden_cov_T)

        f = (D * visible_error.dimshuffle(0, 1, 'x')        # (n, h)
            ).sum(axis=1)
        f += hidden_mean

        F = hidden_cov
        F -= stacked_dot(visible_hidden_cov, D)

        log_l = (inv_visible_cov *                          # (n,)
            visible_error.dimshuffle(0, 1, 'x') *
            visible_error.dimshuffle(0,'x', 1)).sum(axis=(1, 2))
        log_l *= -.5

        dets, _ = theano.map(lambda x: det(x), visible_cov)

        log_l -= 0.5 * T.log(dets)
        log_l -= np.log(2 * np.pi)

        return f, F, log_l
Exemplo n.º 10
0
Arquivo: boew.py Projeto: zbxzc35/boew
 def sym_histograms(self, X, masks=None):
     """
     Encodes a set of objects (X is a tensor3)
     :param X: tensor3 containing the feature vectors for each object
     :return:
     """
     if masks is None:
         histograms, updates = theano.map(self.sym_histogram,
                                          sequences=(X, ))
     else:
         histograms, updates = theano.map(self.sym_histogram,
                                          sequences=(X, masks))
     return histograms
Exemplo n.º 11
0
    def createObjectiveFunction(self):
        '''
        @escription: initialize objective function and minimization function
        @X,y data matrix/vector
        @u random noise for simulator
        @v standard normal for reparametrization trick
        '''
        y = T.dvector("y")
        W, U = T.dvectors("W", "U")
        V = T.dscalar("V")

        mu = self.params[0]
        #logSigma = self.params[1]
        logSigma = sharedX(0.6)
        logLambda = sharedX(0)
        #self.params[2]

        negKL = 0.5 * self.dimTheta + 0.5 * T.sum(2 * logSigma - mu**2 -
                                                  T.exp(logSigma)**2)

        results, updates = th.map(fn=self.alpha_stable,
                                  sequences=[W, U],
                                  non_sequences=[V])
        f = results
        results2, updates2 = th.map(fn=self.alpha_perfect, sequences=[W, U])
        f2 = results2

        #SSE = T.sum((y-f)**2)
        logLike = -self.m * (
            0.5 * np.log(2 * np.pi) + logLambda) - 0.5 * T.sum(
                (T.flatten(y) - T.flatten(f))**2) / (T.exp(logLambda)**2)
        #logLike2 = -self.m*(0.5 * np.log(2 * np.pi) + logLambda)-0.5*T.sum((y-f2)**2)/(T.exp(logLambda)**2)

        elbo = (negKL + logLike)
        #elbo2 = (negKL + logLike2)
        obj = -elbo
        #obj = SSE

        self.f = th.function([y, W, U, V],
                             f,
                             updates=updates,
                             on_unused_input='ignore')
        self.lowerboundfunction = th.function([y, W, U, V],
                                              obj,
                                              updates=updates,
                                              on_unused_input='ignore')
        derivatives = T.grad(obj, self.params)
        self.gradientfunction = th.function([y, W, U, V],
                                            derivatives,
                                            updates=updates,
                                            on_unused_input='ignore')
Exemplo n.º 12
0
 def set_output(self):
     cshape = self.camera_params.shape  # (c, batch, 11)
     voxel = self._input
     voxel_tiled = tensor.tile(voxel, (cshape[0], 1, 1, 1, 1))
     cams = tensor.reshape(self.camera_params,
                           (cshape[0] * cshape[1], cshape[2]))
     camlocs = theano.map(self.get_camloc, cams)[0].astype('float32')
     raydirs = theano.map(self.get_raydirs, cams)[0].astype('float32')
     rendered = self.op(voxel_tiled, camlocs, raydirs)
     rendered_sub = theano.map(self.data_augmentation, [rendered, cams])[0]
     output_shape = (cshape[0], cshape[1], self.img_h, self.img_w,
                     self.feat_d)
     self._output = tensor.reshape(rendered_sub, output_shape)
     self._output = self._output.dimshuffle(0, 1, 4, 2, 3)
Exemplo n.º 13
0
	def pv_function(self, tensor_input):
		indexf_matrix = theano.shared(
									np.zeros(
										[self.max_length, self.max_length], 
										dtype=np.int32
										),
									name = 'indexf_matrix',
									borrow=True
									)
		
		pf_matrix = theano.shared(
								np.zeros(
										[self.max_length, self.max_length], 
										dtype=theano.config.floatX
										),
								name = 'pf_matrix',
								borrow=True
								)
		pf_matrix = T.set_subtensor(pf_matrix[0, 0:tensor_input.shape[0]], 1.0)
		
		vf_matrix = theano.shared(
								np.zeros(
										(self.max_length, self.max_length, self.size), 
										dtype=theano.config.floatX
										),
								name = 'vf_matrix',
								borrow=True
								)
		results, updates = theano.map(
				fn = lambda i, L, t_tensor_input: L[t_tensor_input[i]],
				sequences=[T.arange(tensor_input.shape[0])],
				non_sequences=[self.L, tensor_input],
				name = 'vf_matrix prepare'
				)
		vf_matrix = T.set_subtensor(vf_matrix[0, 0:tensor_input.shape[0]], results)
		
		for i in range(1,self.max_length):
			results, updates = theano.map(
				fn = self._pv_function,
				sequences=[T.arange(self.max_length-i)],
				non_sequences = [i, pf_matrix, vf_matrix],
				#name = 'pv function'
				)
			
			indexf_matrix = T.set_subtensor(indexf_matrix[i, 0:self.max_length-i], results[0])
			pf_matrix = T.set_subtensor(pf_matrix[i, 0:self.max_length-i], results[1])
			vf_matrix = T.set_subtensor(vf_matrix[i, 0:self.max_length-i], results[2])
			
		return indexf_matrix, pf_matrix, vf_matrix
Exemplo n.º 14
0
	def pv_function(self, tensor_input):
		indexf_matrix = theano.shared(
									np.zeros(
										[self.max_length, self.max_length], 
										dtype=np.int32
										),
									name = 'indexf_matrix',
									borrow=True
									)
		
		pf_matrix = theano.shared(
								np.zeros(
										[self.max_length, self.max_length], 
										dtype=theano.config.floatX
										),
								name = 'pf_matrix',
								borrow=True
								)
		pf_matrix = T.set_subtensor(pf_matrix[0, 0:tensor_input.shape[0]], 1.0)
		
		vf_matrix = theano.shared(
								np.zeros(
										(self.max_length, self.max_length, self.size), 
										dtype=theano.config.floatX
										),
								name = 'vf_matrix',
								borrow=True
								)
		results, updates = theano.map(
				fn = lambda i, L, t_tensor_input: L[t_tensor_input[i]],
				sequences=[T.arange(tensor_input.shape[0])],
				non_sequences=[self.L, tensor_input],
				name = 'vf_matrix prepare'
				)
		vf_matrix = T.set_subtensor(vf_matrix[0, 0:tensor_input.shape[0]], results)
		
		for i in range(1,self.max_length):
			results, updates = theano.map(
				fn = self._pv_function,
				sequences=[T.arange(self.max_length-i)],
				non_sequences = [i, pf_matrix, vf_matrix],
				#name = 'pv function'
				)
			
			indexf_matrix = T.set_subtensor(indexf_matrix[i, 0:self.max_length-i], results[0])
			pf_matrix = T.set_subtensor(pf_matrix[i, 0:self.max_length-i], results[1])
			vf_matrix = T.set_subtensor(vf_matrix[i, 0:self.max_length-i], results[2])
			
		return indexf_matrix, pf_matrix, vf_matrix
Exemplo n.º 15
0
 def call(self, x, mask):
     x_switched = K.switch(mask[:, :, None], x, 0.0)
     activation_ranks = theano.map(rank_function, x_switched)[0]
     activation_energies = K.switch(mask[:, None, :maxsents],
                                    activation_ranks, -1e20)
     activation_weights = theano.map(K.softmax, activation_energies)[0]
     base_values = (mask * ((K.sum(mask[:, :maxsents] + 0.0, axis=-1))**
                            -1)[:, None])[:, None, :maxsents]
     pad_weights = K.concatenate(
         (base_values, activation_weights[:, :-1, :]), axis=1)
     diff_weights = activation_weights - pad_weights
     posi_diffs = K.switch(diff_weights > 0, diff_weights, 0.0)
     norm_pds = (K.sum(posi_diffs, axis=-1) + K.epsilon())**-1
     attentions = posi_diffs * norm_pds[:, :, None]
     return attentions
Exemplo n.º 16
0
    def decode_to_probs(self, activations, relative_position, low_bound, high_bound):
        squashed = T.reshape(activations, (-1,self.RAW_ENCODING_WIDTH))
        n_parallel = squashed.shape[0]
        probs = T.nnet.softmax(squashed)


        def _scan_fn(cprobs, cpos):

            if self.with_artic:
                abs_probs = cprobs[:2]
                rel_probs = cprobs[2:]
            else:
                rel_probs = cprobs
                abs_probs = T.ones((2,))

            aligned = T.roll(rel_probs, (cpos-low_bound)%12)

            num_tile = int(math.ceil((high_bound-low_bound)/self.WINDOW_SIZE))

            tiled = T.tile(aligned, (num_tile,))[:(high_bound-low_bound)]

            full = T.concatenate([abs_probs, tiled], 0)
            return full

        # probs = theano.printing.Print("probs",['shape'])(probs)
        # relative_position = theano.printing.Print("relative_position",['shape'])(relative_position)
        from_scan, _ = theano.map(fn=_scan_fn, sequences=[probs, T.flatten(relative_position)])
        # from_scan = theano.printing.Print("from_scan",['shape'])(from_scan)
        newshape = T.concatenate([activations.shape[:-1],[2+high_bound-low_bound]],0)
        fixed = T.reshape(from_scan, newshape, ndim=activations.ndim)
        return fixed
Exemplo n.º 17
0
    def apply(self, image, image_shape, location, scale):
        a, b = self.compute_hard_windows(image_shape, location, scale)

        if self.batched_window:
            patch = self.apply_inner(image, location, scale, a[0], b[0])
        else:

            def map_fn(image, image_shape, a, b, location, scale):
                # apply_inner expects a batch axis
                image = T.shape_padleft(image)
                location = T.shape_padleft(location)
                scale = T.shape_padleft(scale)

                patch = self.apply_inner(image, location, scale, a, b)

                # return without batch axis
                return patch[0]

            patch, _ = theano.map(map_fn,
                                  sequences=[image, a, b, location, scale])

        savings = (1 - T.cast(
            (b - a).prod(axis=1), floatX) / image_shape.prod(axis=1))
        self.add_auxiliary_variable(savings, name="savings")

        return patch
Exemplo n.º 18
0
    def get_output_for(self, input, **kwargs):
        def sample_one_image(img, y, x):
            return theano.map(
                lambda x, y, image: image[:, y:(y + self.patch_size[0]), x:
                                          (x + self.patch_size[1])],
                sequences=[x, y],
                non_sequences=img)[0]

        if self.pad:
            shp = (input.shape[0], input.shape[1],
                   input.shape[2] + self.patch_size[0] * 2 - 2,
                   input.shape[3] + self.patch_size[1] * 2 - 2)

            padded_input = T.zeros(shp)
            padded_input = T.set_subtensor(
                padded_input[:, :, (self.patch_size[0] -
                                    1):(-self.patch_size[0] + 1),
                             (self.patch_size[1] - 1):(-self.patch_size[1] +
                                                       1)], input)

            input = padded_input

        y = self.rng.random_integers(size=(input.shape[0],
                                           self.patches_per_example),
                                     low=0,
                                     high=input.shape[2] - self.patch_size[0])
        x = self.rng.random_integers(size=(input.shape[0],
                                           self.patches_per_example),
                                     low=0,
                                     high=input.shape[3] - self.patch_size[1])

        return theano.map(sample_one_image,
                          sequences=[input, y, x])[0].reshape(
                              (-1, input.shape[1], self.patch_size[0],
                               self.patch_size[1]))
Exemplo n.º 19
0
    def get_reward_sequences(self, env_state_sessions, agent_action_sessions):
        """Computes the rewards given to agent at each time step for each batch.

        :param env_state_sessions: Environment state [batch_i,seq_i,state_units] history for all sessions.
        :type env_state_sessions: theano tensor [batch_i,seq_i,state_units]

        :param agent_action_sessions: Actions chosen by agent at each tick for all sessions.
        :type agent_action_sessions: int[batch_i,seq_i]

        :return rewards: What reward was given to an agent for corresponding action from state in that batch.
        :rtype: float[batch_i,seq_i]
        """
        env_state_sessions = check_list(env_state_sessions)
        n_states = len(env_state_sessions)
        agent_action_sessions = check_list(agent_action_sessions)
        n_actions = len(agent_action_sessions)

        def compute_reward(batch_i, *args):
            session_states, session_actions = unpack_list(args, [n_states, n_actions])
            return self.get_reward(session_states, session_actions, batch_i)

        sequences = [T.arange(agent_action_sessions[0].shape[0], ), ] + env_state_sessions + agent_action_sessions

        rewards, updates = theano.map(compute_reward, sequences=sequences)

        assert len(updates) == 0
        return rewards.reshape(agent_action_sessions[0].shape)  # reshape bach to original
Exemplo n.º 20
0
def cosine_similarity(x, y, eps=1e-6):
    r"""
    Cosine similarity between a vector and each row of a base matrix.

    Parameters
    ----------
    x: a 1D Theano variable
        Vector to compare to each row of the matrix y.
    y: a 2D Theano variable
        Matrix to be compared to
    eps: float
        Precision of the operation (necessary for differentiability).

    Return
    ------
    z: a 1D Theano variable
        A vector whose components are the cosine similarities
        between x and each row of y.
    """
    def _cosine_similarity(x, y, eps=1e-6):
        y = y.dimshuffle(1, 0)
        z = T.dot(x, y)
        z /= T.sqrt(T.sum(x * x) * T.sum(y * y, axis=0) + eps)

        return z

    def step(x_b, y_b):
        return _cosine_similarity(x_b, y_b, eps)
    z, _ = theano.map(step, sequences=[x, y])

    return z
Exemplo n.º 21
0
    def gen_full_alignment(self):

        # Get only the focus columns
        for seq_name,sequence in self.seq_name_to_sequence.items():
            # Replace periods with dashes (the uppercase equivalent)
            sequence = sequence.replace(".","-")

            #then get only the focus columns
            self.seq_name_to_sequence[seq_name] = [sequence[ix].upper() for ix in self.focus_cols]

        # Remove sequences that have bad characters
        alphabet_set = set(list(self.alphabet))
        seq_names_to_remove = []
        for seq_name,sequence in self.seq_name_to_sequence.items():
            for letter in sequence:
                if letter not in alphabet_set and letter != "-":
                    seq_names_to_remove.append(seq_name)

        seq_names_to_remove = list(set(seq_names_to_remove))
        for seq_name in seq_names_to_remove:
            del self.seq_name_to_sequence[seq_name]

        # Encode the sequences
        print ("Encoding sequences")
        self.x_train = np.zeros((len(self.seq_name_to_sequence.keys()),len(self.focus_cols),len(self.alphabet)))
        self.x_train_name_list = []
        for i,seq_name in enumerate(self.seq_name_to_sequence.keys()):
            sequence = self.seq_name_to_sequence[seq_name]
            self.x_train_name_list.append(seq_name)
            for j,letter in enumerate(sequence):
                if letter in self.aa_dict:
                    k = self.aa_dict[letter]
                    self.x_train[i,j,k] = 1.0


        # Fast sequence weights with Theano
        if self.calc_weights:
            print ("Computing sequence weights")
            # Numpy version
            # import scipy
            # from scipy.spatial.distance import pdist, squareform
            # self.weights = scale / np.sum(squareform(pdist(seq_index_array, metric="hamming")) < theta, axis=0)
            #
            # Theano weights
            X = T.tensor3("x")
            cutoff = T.scalar("theta")
            X_flat = X.reshape((X.shape[0], X.shape[1]*X.shape[2]))
            N_list, updates = theano.map(lambda x: 1.0 / T.sum(T.dot(X_flat, x) / T.dot(x, x) > 1 - cutoff), X_flat)
            weightfun = theano.function(inputs=[X, cutoff], outputs=[N_list],allow_input_downcast=True)
            #
            self.weights = weightfun(self.x_train, self.theta)[0]

        else:
            # If not using weights, use an isotropic weight matrix
            self.weights = np.ones(self.x_train.shape[0])

        self.Neff = np.sum(self.weights)

        print ("Neff =",str(self.Neff))
        print ("Data Shape =",self.x_train.shape)
Exemplo n.º 22
0
    def get_reward_sequences(self,env_state_sessions,agent_action_sessions):
        """
        computes the rewards given to agent at each time step for each batch
        parameters:
            env_state_seq - environment state [batch_i,seq_i,state_units] history for all sessions
            agent_action_seq - int[batch_i,seq_i]
        returns:
            rewards float[batch_i,seq_i] - what reward was given to an agent for corresponding action from state in that batch

        """
        
        
        
        def compute_reward(batch_i,session_states,session_actions):
            return self.get_reward(session_states,session_actions,batch_i)



        sequences = [
            T.arange(env_state_sessions.shape[0],),
            env_state_sessions,
            agent_action_sessions,
        ]

        rewards,updates = theano.map(compute_reward,
                              sequences=sequences)
        assert len(updates)==0
        return rewards.reshape(agent_action_sessions.shape) #reshape bach to original
Exemplo n.º 23
0
 def connect(self, S):
     self.S = S
     def step(s_current, h_prev):
         h_t = self.activation(
             T.dot(s_current, self.W_ih) + 
             T.dot(h_prev, self.W_hh)
         )
         y_t = self.activation(
             T.dot(h_t, self.W_ho)
         )
         return h_t, y_t
     [self.H, self.output], _ = theano.scan(
         step,
         sequences = self.S,
         outputs_info = [self.h_init, None]
     )
     
     self.prediction, _ = theano.map(
         lambda x: T.argmax(x),
         sequences = self.output
     )
     self.final_state = self.H[self.H.shape[0] - 1]
     self.outputter = theano.function([self.S], self.output)
     self.predicter = theano.function([self.S], self.prediction)
     self.CONNECTED = True
Exemplo n.º 24
0
def gaussian_filter_2d_variable_sigma(input,
                                      sigmas,
                                      window_radius=None,
                                      border_mode='zero'):
    def filter_sigma(idx, kernel):
        dimpattern_w = ('x', 'x', 'x', 0)
        dimpattern_h = ('x', 'x', 0, 'x')
        filter_w = kernel.dimshuffle(dimpattern_w)
        blur_w = T.nnet.conv2d(padded_input[idx:idx + 1],
                               filter_w,
                               border_mode=_get_chained_w_h_conv_border(
                                   conv_border, 'w'),
                               filter_shape=[1, 1, 1, None])
        filter_h = kernel.dimshuffle(dimpattern_h)
        return T.nnet.conv2d(blur_w,
                             filter_h,
                             border_mode=_get_chained_w_h_conv_border(
                                 conv_border, 'h'),
                             filter_shape=[1, 1, None, 1])

    ndim = 4
    assert input.ndim == ndim, \
        "there must be {} dimensions, got {}".format(ndim, input.ndim)
    window_radius = gaussian_kernel_default_radius(sigmas, window_radius)
    padded_input, conv_border = add_border(input, window_radius, border_mode)
    kernel = gaussian_kernel_1d(sigmas, window_radius)
    blur, _ = theano.map(filter_sigma,
                         sequences=[T.arange(sigmas.shape[0]), kernel])
    return blur.reshape(input.shape)
 def getSample(self, Y, nSamp = 1):    
     def get_layers(ii):
         output = lasagne.layers.get_output(lasagne.layers.get_all_layers(self.sbn_nn), inputs = Y)
         return output[::-1]
     
     output,_ = theano.map(get_layers, T.arange(nSamp))
     return output
Exemplo n.º 26
0
def compile_vime_reward(l_prediction,l_prev_states,l_actions,weights,
                        get_loss = lambda pred,real: T.mean((pred-real)**2),
                        n_samples = 1,
                        delta=0.01,**kwargs):
    """compiles a function that predicts vime reward for each state in a batch"""
    prev_states = T.matrix("previous states")
    actions = T.ivector("actions")
    next_states = T.matrix("next states")
    if n_samples ==1:
        get_bnn = lambda state,action: lasagne.layers.get_output(l_prediction,
                                                     inputs={l_prev_states:state[None,:],
                                                             l_actions:action[None]},**kwargs)
    else:
        get_bnn = lambda state,action: sample_output(l_prediction,
                                                     input_dict={l_prev_states:state[None,:],
                                                                 l_actions:action[None]},
                                                     n_samples=n_samples,**kwargs)
        
    
    vime_reward_per_state,auto_updates = theano.map(lambda s,a,s_next: get_r_vime_on_state(weights,
                                                                              get_loss(get_bnn(s,a),s_next),
                                                                              delta),
                                       sequences=[prev_states,actions,next_states])
    
    return theano.function([prev_states,actions,next_states],vime_reward_per_state,
                           updates=auto_updates,allow_input_downcast=True)
Exemplo n.º 27
0
    def test_function5(self):

        w = theano.shared(1.0, name="w")


        def joke(a, b):
            k = w * a
            # g = 0.01 * T.grad((k - 1)**2, w)
            return k, {w: w - 1.0}

        x = T.dscalar("x")
        hs, _ = theano.scan(joke, sequences=[np.array([1.0, 2.0, 3.0])], outputs_info=[np.float64(1.0)] )

        print hs, _

        def upd(h):
            return T.grad(hs[h], w)

        gs, up = theano.map(upd, sequences=[T.arange(hs.shape[0])])
        print gs, up
        # print hs, _
        # print gs, up

        func = theano.function(inputs=[], outputs=gs, updates= [])
        print func()
        print w.get_value()
Exemplo n.º 28
0
 def build_validation(X, Y):
     def myscanfunc(ind):
         X_ = X[ind*megabatch_size:(ind+1)*megabatch_size]
         Y_ = Y[ind*megabatch_size:(ind+1)*megabatch_size]
         return simple_build_likelihood(X_, Y_, ind=T.mod(pool_ind+ind, param_pool_size))
     result = theano.map (myscanfunc, sequences=[T.arange(T.max([1, X.shape[0]/megabatch_size]))]) [0]
     return T.sum(result)*dataset_size/X.shape[0]-_KLD
    def compute_tree(self, emb_x, tree):
        self.recursive_unit = self.create_recursive_unit()
        self.leaf_unit = self.create_leaf_unit()
        num_nodes = tree.shape[0]  # num internal nodes
        num_leaves = self.num_words - num_nodes

        # compute leaf hidden states
        leaf_h, _ = theano.map(fn=self.leaf_unit,
                               sequences=[emb_x[:num_leaves]])
        if self.irregular_tree:
            init_node_h = T.concatenate([leaf_h, leaf_h], axis=0)
        else:
            init_node_h = leaf_h

        # use recurrence to compute internal node hidden states
        def _recurrence(cur_emb, node_info, t, node_h, last_h):
            child_exists = node_info > -1
            offset = num_leaves * int(self.irregular_tree) - child_exists * t
            child_h = node_h[node_info + offset] * child_exists.dimshuffle(
                0, 'x')
            parent_h = self.recursive_unit(cur_emb, child_h, child_exists)
            node_h = T.concatenate(
                [node_h, parent_h.reshape([1, self.hidden_dim])])
            return node_h[1:], parent_h

        dummy = theano.shared(self.init_vector([self.hidden_dim]))
        (_, parent_h), _ = theano.scan(
            fn=_recurrence,
            outputs_info=[init_node_h, dummy],
            sequences=[emb_x[num_leaves:], tree,
                       T.arange(num_nodes)],
            n_steps=num_nodes)

        return T.concatenate([leaf_h, parent_h], axis=0)
Exemplo n.º 30
0
def gaussian_filter_2d_variable_sigma(input, sigmas,
                                      window_radius=None,
                                      border_mode='zero'
                                      ):
    def filter_sigma(idx, kernel):
        dimpattern_w = ('x', 'x', 'x', 0)
        dimpattern_h = ('x', 'x', 0, 'x')
        filter_w = kernel.dimshuffle(dimpattern_w)
        blur_w = T.nnet.conv2d(
            padded_input[idx:idx+1], filter_w,
            border_mode=_get_chained_w_h_conv_border(conv_border, 'w'),
            filter_shape=[1, 1, 1, None])
        filter_h = kernel.dimshuffle(dimpattern_h)
        return T.nnet.conv2d(
            blur_w, filter_h,
            border_mode=_get_chained_w_h_conv_border(conv_border, 'h'),
            filter_shape=[1, 1, None, 1])

    ndim = 4
    assert input.ndim == ndim, \
        "there must be {} dimensions, got {}".format(ndim, input.ndim)
    window_radius = gaussian_kernel_default_radius(sigmas, window_radius)
    padded_input, conv_border = add_border(input, window_radius, border_mode)
    kernel = gaussian_kernel_1d(sigmas, window_radius)
    blur, _ = theano.map(
        filter_sigma,
        sequences=[T.arange(sigmas.shape[0]), kernel])
    return blur.reshape(input.shape)
Exemplo n.º 31
0
def gradSort(met, X):
    # -----------Start Batch Loop------------
    m = T.fvector()
    x = T.fmatrix()

    # Sort the input on the metric
    z = T.argsort(m, axis=0)
    out = x[z] + 0 * T.sum(m)

    sort = theano.function([m, x], [out])

    # Fix the gradient of the sort operation to be the sum of
    #   the gradients with respect to the input features
    def grad_edit(inps, grads):
        m, x = inps
        g, = grads

        z = T.argsort(m, axis=0)
        s = T.sum(g, axis=-1)

        am = T.max(abs(s), axis=-1)

        s = 10 * (s - T.clip(s, -.90 * am, .90 * am))

        out = s
        return out, g

    op = theano.OpFromGraph([m, x], [out])
    op.grad = grad_edit

    results, updates = theano.map(fn=op, sequences=[met, X], name='batch_sort')
    # ---------END Batch Loop-----------------

    r = results
    return r
Exemplo n.º 32
0
def cosine_similarity(x, y, eps=1e-6):
    r"""
    Cosine similarity between a vector and each row of a base matrix.

    Parameters
    ----------
    x: a 1D Theano variable
        Vector to compare to each row of the matrix y.
    y: a 2D Theano variable
        Matrix to be compared to
    eps: float
        Precision of the operation (necessary for differentiability).

    Return
    ------
    z: a 1D Theano variable
        A vector whose components are the cosine similarities
        between x and each row of y.
    """
    def _cosine_similarity(x, y, eps=1e-6):
        y = y.dimshuffle(1, 0)
        z = T.dot(x, y)
        z /= T.sqrt(T.sum(x * x) * T.sum(y * y, axis=0) + eps)

        return z

    def step(x_b, y_b):
        return _cosine_similarity(x_b, y_b, eps)

    z, _ = theano.map(step, sequences=[x, y])

    return z
Exemplo n.º 33
0
 def attend(self, y_p):
   updates = self.default_updates()
   for g in range(self.attrs['glimpse']):
     for i in range(len(self.base)-1,-1,-1):
       factor = T.constant(self.base[i].attrs['factor'][0], 'int32') if i > 0 else 1
       B, C, I, h_p, _ = self.get(y_p, i, g)
       if i == len(self.base) - 1:
         z_i = self.distance(C, h_p)
       else:
         length = T.cast(T.max(T.sum(I,axis=0))+1,'int32')
         ext = T.cast(T.minimum(ext/factor,T.min(length)),'int32')
         def pick(i_t, ext):
           pad = T.minimum(i_t+ext, B.shape[0]) - ext
           return T.concatenate([T.zeros((pad,), 'int8'), T.ones((ext,), 'int8'), T.zeros((B.shape[0]-pad-ext+1,), 'int8')], axis=0)
         idx, _ = theano.map(pick, sequences = [pos/factor], non_sequences = [ext])
         idx = (idx.dimshuffle(1,0)[:-1].flatten() > 0).nonzero()
         C = C.reshape((C.shape[0]*C.shape[1],C.shape[2]))[idx].reshape((ext,C.shape[1],C.shape[2]))
         z_i = self.distance(C, h_p)
         I = I.reshape((I.shape[0]*I.shape[1],))[idx].reshape((ext,I.shape[1]))
       if i > 0:
         pos = T.argmax(self.softmax(z_i,I),axis=0) * factor
         ext = factor
       else:
         w_i = self.softmax(z_i,I)
     B = B.reshape((B.shape[0]*B.shape[1],B.shape[2]))[idx].reshape((ext,B.shape[1],B.shape[2]))
     proto = T.sum(B * w_i.dimshuffle(0,1,'x').repeat(B.shape[2],axis=2),axis=0)
     for i in range(len(self.base)):
       self.glimpses[i].append(proto)
   return T.dot(proto, self.custom_vars['W_att_in_0']), updates
Exemplo n.º 34
0
    def cross_cpu(self, entities):
        n, m = entities.shape
        pop = T.reshape(entities, (2, n * m / 2))

        if self.fast_rng is None:
            xpoints = self.rng.random_integers(size=(n / 2, ),
                                               low=0,
                                               high=m - 1)
        else:
            xpoints = self.fast_rng.uniform(size=(n / 2, ), low=0, high=m - 1)
            xpoints = xpoints.astype('int32')

        def choice_vector(xpoint, nbits):
            return T.concatenate([
                T.zeros((xpoint, ), dtype='uint8'),
                T.ones((nbits - xpoint, ), dtype='uint8')
            ])

        values, updates = theano.map(fn=choice_vector,
                                     sequences=[xpoints],
                                     non_sequences=[m],
                                     name='choice_vector_building')

        a = T.reshape(values, (n * m / 2, ))
        pop = T.concatenate([T.choose(a, pop), T.choose(1 - a, pop)])
        pop = T.reshape(pop, (n, m))
        return pop
Exemplo n.º 35
0
def theano_scan_color(writer, draw_fn):
	with writer as writer_buf:
		writer_buf_reshaped = writer_buf.reshape((Screen.screen_vane_count, Screen.screen_max_magnitude, 3))
		vane_matrix = [[[float(vane), float(vane), float(vane)] for px in range(Screen.screen_max_magnitude)]
					   for vane in range(Screen.screen_vane_count)]
		px_matrix =   [[[float(px),float(px),float(px)] for px in range(Screen.screen_max_magnitude)]
					   for vane in range(Screen.screen_vane_count)]
		col_matrix =  [[[float(0), float(1), float(2)] for px in range(Screen.screen_max_magnitude)]
					   for vane in range(Screen.screen_vane_count)]
		vane_vec = T.as_tensor(vane_matrix)
		px_vec = T.as_tensor(px_matrix)
		col_vec = T.as_tensor(col_matrix)
		step = T.fscalar('step')

		draw_fn_with_step = draw_fn(step)
		f, _ = theano.map(draw_fn_with_step, [vane_vec, px_vec, col_vec])

		fn_actual = theano.function([step], f, allow_input_downcast=True, on_unused_input='ignore')

		step_actual = 0
		while True:
			writer.frame_ready()
			start = time.time()
			writer_buf_reshaped[:] = fn_actual(step_actual)
			step_actual -= 1
			done = time.time()
			fps = 1.0/(done - start)
			if fps < TARGET_FPS:
				logging.warning('Frame rate is %f, which is lower than target %d', fps, TARGET_FPS)
Exemplo n.º 36
0
    def compute_tree(self, emb_x, tree):
        self.recursive_unit = self.create_recursive_unit()
        self.leaf_unit = self.create_leaf_unit()
        num_nodes = tree.shape[0]  # num internal nodes
        num_leaves = self.num_words - num_nodes

        # compute leaf hidden states
        leaf_h, _ = theano.map(
            fn=self.leaf_unit,
            sequences=[emb_x[:num_leaves]])

        # use recurrence to compute internal node hidden states
        def _recurrence(cur_emb, node_info, t, node_h, last_h):
            child_exists = node_info > -1
            child_h = node_h[node_info - child_exists * t] * child_exists.dimshuffle(0, 'x')
            parent_h = self.recursive_unit(cur_emb, child_h, child_exists)
            node_h = T.concatenate([node_h,
                                    parent_h.reshape([1, self.hidden_dim])])
            return node_h[1:], parent_h

        dummy = theano.shared(self.init_vector([self.hidden_dim]))
        (_, parent_h), _ = theano.scan(
            fn=_recurrence,
            outputs_info=[leaf_h, dummy],
            sequences=[emb_x[num_leaves:], tree, T.arange(num_nodes)],
            n_steps=num_nodes)

        return T.concatenate([leaf_h, parent_h], axis=0)
Exemplo n.º 37
0
 def attend(self, y_p):
   updates = self.default_updates()
   for g in range(self.attrs['glimpse']):
     for i in range(len(self.base)-1,-1,-1):
       factor = T.constant(self.base[i].attrs['factor'][0], 'int32') if i > 0 else 1
       B, C, I, H, W_att_in, b_att_in = self.get(y_p, i, g)
       if i == len(self.base) - 1:
         z_i = self.distance(C, H)
       else:
         length = T.cast(T.max(T.sum(I,axis=0))+1,'int32')
         ext = T.cast(T.minimum(ext/factor,T.min(length)),'int32')
         def pick(i_t, ext):
           pad = T.minimum(i_t+ext, B.shape[0]) - ext
           return T.concatenate([T.zeros((pad,), 'int8'), T.ones((ext,), 'int8'), T.zeros((B.shape[0]-pad-ext+1,), 'int8')], axis=0)
         idx, _ = theano.map(pick, sequences = [pos/factor], non_sequences = [ext])
         idx = (idx.dimshuffle(1,0)[:-1].flatten() > 0).nonzero()
         C = C.reshape((C.shape[0]*C.shape[1],C.shape[2]))[idx].reshape((ext,C.shape[1],C.shape[2]))
         z_i = self.distance(C, H)
         I = I.reshape((I.shape[0]*I.shape[1],))[idx].reshape((ext,I.shape[1]))
       if i > 0:
         pos = T.argmax(self.softmax(z_i,I),axis=0) * factor
         ext = factor
       else:
         w_i = self.softmax(z_i,I)
     B = B.reshape((B.shape[0]*B.shape[1],B.shape[2]))[idx].reshape((ext,B.shape[1],B.shape[2]))
     proto = T.sum(B * w_i.dimshuffle(0,1,'x').repeat(B.shape[2],axis=2),axis=0)
     for i in range(len(self.base)):
       self.glimpses[i].append(proto)
   return T.dot(proto, self.custom_vars['W_att_in_0']), updates
Exemplo n.º 38
0
    def get_reward_sequences(self, env_state_sessions, agent_action_sessions):
        """
        computes the rewards given to agent at each time step for each batch
        parameters:
            env_state_seq - environment state [batch_i,seq_i,state_units] history for all sessions
            agent_action_seq - int[batch_i,seq_i]
        returns:
            rewards float[batch_i,seq_i] - what reward was given to an agent for corresponding action from state in that batch

        """
        env_state_sessions = check_list(env_state_sessions)
        n_states = len(env_state_sessions)
        agent_action_sessions = check_list(agent_action_sessions)
        n_actions = len(agent_action_sessions)

        def compute_reward(batch_i, *args):
            session_states, session_actions = unpack_list(
                args, [n_states, n_actions])
            return self.get_reward(session_states, session_actions, batch_i)

        sequences = [
            T.arange(agent_action_sessions[0].shape[0], ),
        ] + env_state_sessions + agent_action_sessions

        rewards, updates = theano.map(compute_reward, sequences=sequences)

        assert len(updates) == 0
        return rewards.reshape(
            agent_action_sessions[0].shape)  # reshape bach to original
Exemplo n.º 39
0
    def get_reward_sequences(self, env_state_sessions, agent_action_sessions):
        """Computes the rewards given to agent at each time step for each batch.

        :param env_state_sessions: Environment state [batch_i,seq_i,state_units] history for all sessions.
        :type env_state_sessions: theano tensor [batch_i,seq_i,state_units]

        :param agent_action_sessions: Actions chosen by agent at each tick for all sessions.
        :type agent_action_sessions: int[batch_i,seq_i]

        :return rewards: What reward was given to an agent for corresponding action from state in that batch.
        :rtype: float[batch_i,seq_i]
        """
        env_state_sessions = check_list(env_state_sessions)
        n_states = len(env_state_sessions)
        agent_action_sessions = check_list(agent_action_sessions)
        n_actions = len(agent_action_sessions)

        def compute_reward(batch_i, *args):
            session_states, session_actions = unpack_list(
                args, [n_states, n_actions])
            return self.get_reward(session_states, session_actions, batch_i)

        sequences = [
            T.arange(agent_action_sessions[0].shape[0], ),
        ] + env_state_sessions + agent_action_sessions

        rewards, updates = theano.map(compute_reward, sequences=sequences)

        assert len(updates) == 0
        return rewards.reshape(
            agent_action_sessions[0].shape)  # reshape bach to original
    def compute_objective_and_gradients(self, nSamp):
        hsamp = self.mrec.getSample(self.Y, nSamp)

        # evaluate the generative model density P_\theta(y_i , h_i)
        p_yh,_ = theano.map(self.mprior.evaluateLogDensity, sequences=hsamp)
        # evaluate the recognition model density Q_\phi(h_i | y_i)
        q_hgy,_ = theano.map(self.mrec.evalLogDensity, sequences=hsamp)

        ff = (p_yh-q_hgy)
        sortidx = ff.argsort(axis=0)
        
        fmax = ff[(sortidx[-1],T.arange(ff.shape[-1]))].dimshuffle('x',0)
        
        f_hy = T.exp(ff - fmax)
        sum_across_samples = f_hy.sum(axis=0, keepdims = True)
        Lhat = T.log(sum_across_samples/nSamp) + fmax
         
        col_idx = T.arange(ff.shape[-1])
                                                                          # This 1e-12 constant is for debugging nans
                                                                          # in other parts of code. We know we'll get 
                                                                          # nans where we'll then overwrite. Use it with 
        # compute cross-validated estimates of Lhat                       # nanguard mode.
        hold_out_except_last = T.log((sum_across_samples - f_hy)/(nSamp-1)) + fmax #+1e-12) + fmax
        f2max_vec = ff[(sortidx[-2],T.arange(ff.shape[-1]))]
        f2max = f2max_vec.dimshuffle('x',0)
        # Do tricky things to keep the numerics in order (avoid a term being \approxeq 0)
        ff_nolast = T.set_subtensor(ff[(sortidx[-1],col_idx)], f2max_vec)
        f_hy_last = T.exp(ff_nolast - f2max)
        # compute held-out sum when we hold out the maximum element
        hold_out_last = T.log((f_hy_last.sum(axis=0, keepdims=True) - f_hy_last)/(nSamp-1)) + f2max    
        # compute final held-out estimates
        hold_out = T.set_subtensor(hold_out_except_last[(sortidx[-1],col_idx)], hold_out_last[(sortidx[-1],col_idx)])
                        
        Lhat_cv = Lhat - hold_out 
        the_ws = f_hy / sum_across_samples

        weighted_q = T.sum((Lhat_cv*q_hgy + the_ws*ff).mean(axis=1))
        #weighted_q = T.sum((Lhat_cv*q_hgy + the_ws*(p_yh-q_hgy)).sum(axis=1))

        # gradients for approximate posterior
        dqhgy = T.grad(cost=weighted_q, wrt = self.mrec.getParams(), consider_constant=([the_ws,Lhat_cv]+hsamp), disconnected_inputs='ignore')
        
        # gradients for prior
        dpyh = T.grad(cost=T.sum((the_ws*ff).mean(axis=1)), wrt = self.mprior.getParams(), consider_constant=hsamp + [the_ws], disconnected_inputs='ignore')        
        #dpyh = T.grad(cost=T.sum((the_ws*(p_yh-q_hgy)).sum(axis=1)), wrt = self.mprior.getParams(), consider_constant=hsamp + [the_ws], disconnected_inputs='ignore')        
        
        return [Lhat.mean(), dpyh, dqhgy]
Exemplo n.º 41
0
 def _score_ao_tg(self, tag_ids, word_ids):
     output, _ = theano.map(fn=order0_ll_score_given_word_and_tag, 
                            sequences=[tag_ids, word_ids], 
                            non_sequences=[self._tg_lp_tag_np_table,
                                           self._tg_tag_emb,
                                           self._tg_word_emb],
                            name="_score_ao_tg")
     return T.sum(output)
Exemplo n.º 42
0
def hessian_diag1(f, v):
    g = gradient1(f, v)
    idx = tt.arange(g.shape[0], dtype='int32')

    def hess_ii(i):
        return gradient1(g[i], v)[i]

    return theano.map(hess_ii, idx)[0]
Exemplo n.º 43
0
    def call(self, inputs, mask=None):
        l1 = inputs[0]
        l2 = inputs[1]

        def f(i, l1, l2):
            return T.clip(T.batched_tensordot(l1[i], l2[i], 1), FLOAT_MIN, FLOAT_MAX).astype(FLOATX)

        return theano.map(f, T.arange(l1.shape[0]), non_sequences=[l1, l2])[0]
Exemplo n.º 44
0
def hessian_diag1(f, v):
    g = gradient1(f, v)
    idx = tt.arange(g.shape[0], dtype='int32')

    def hess_ii(i):
        return gradient1(g[i], v)[i]

    return theano.map(hess_ii, idx)[0]
Exemplo n.º 45
0
 def __call__(self,X):
     #out = self.W[:,X]
     def step(x):
         return self.W[:x]
     stk = theano.map( lambda x: self.W[x],X)
     out = T.stacklists(stk[0])
     #return out.dimshuffle('x','x',0,1)
     return out
Exemplo n.º 46
0
 def sym_histograms(self, X):
     """
     Encodes a set of objects (X is a tensor3)
     :param X: tensor3 containing the feature vectors for each object
     :return:
     """
     histograms, updates = theano.map(self.sym_histogram, X)
     return histograms
    def __init__(
        self,
        rng,
        input,
        vocab_size,
        embed_dm,
        embeddings=None,
    ):
        """
        input: theano.tensor.dmatrix, (number of instances, sentence word number)
        
        vocab_size: integer, the size of vocabulary,

        embed_dm: integer, the dimension of word vector representation

        embeddings: theano.tensor.TensorType
        pretrained embeddings
        """
        if embeddings:
            print "Use pretrained embeddings: ON"
            assert embeddings.get_value().shape == (
                vocab_size,
                embed_dm), "%r != %r" % (embeddings.get_value().shape,
                                         (vocab_size, embed_dm))

            self.embeddings = embeddings
        else:
            print "Use pretrained embeddings: OFF"
            embedding_val = np.asarray(rng.normal(0,
                                                  0.05,
                                                  size=(vocab_size, embed_dm)),
                                       dtype=theano.config.floatX)

            embedding_val[
                vocab_size -
                1, :] = 0  # the <PADDING> character is intialized to 0

            self.embeddings = theano.shared(np.asarray(
                embedding_val, dtype=theano.config.floatX),
                                            borrow=True,
                                            name='embeddings')

        self.params = [self.embeddings]

        self.param_shapes = [(vocab_size, embed_dm)]

        # Return:

        # :type, theano.tensor.tensor4
        # :param, dimension(1, 1, word embedding dimension, number of words in sentence)
        #         made to be 4D to fit into the dimension of convolution operation
        sent_embedding_list, updates = theano.map(
            lambda sent: self.embeddings[sent], input)
        sent_embedding_tensor = T.stacklists(
            sent_embedding_list)  # make it into a 3D tensor

        self.output = sent_embedding_tensor.dimshuffle(
            0, 'x', 2, 1)  # make it a 4D tensor
Exemplo n.º 48
0
def jacobian1(f, v):
    """jacobian of f wrt v"""
    f = tt.flatten(f)
    idx = tt.arange(f.shape[0], dtype='int32')

    def grad_i(i):
        return gradient1(f[i], v)

    return theano.map(grad_i, idx)[0]
Exemplo n.º 49
0
	def _pv_function1(self, tensor_left, tensor_right, pf_matrix, vf_matrix):
		results, updates = theano.map(
				fn = lambda i, tensor_left, tensor_right, pf_matrix, vf_matrix: self.get_new_p(tensor_left, tensor_right, i, pf_matrix, vf_matrix),
				sequences=[T.arange(tensor_left, tensor_right)],
				non_sequences = [tensor_left, tensor_right, pf_matrix, vf_matrix],
				name = 'pv function'
				)
		max_pf, index = T.max_and_argmax(results, axis=0)
		return [index + tensor_left, max_pf, self.get_new_v(tensor_left, tensor_right, index + tensor_left, vf_matrix)]
Exemplo n.º 50
0
    def call(self, inputs, mask=None):
        def f(i, embedding, text_input):
            mask = T.neq(text_input[i], 0).astype(FLOATX)
            vec = T.dot(mask, embedding[i])
            vec /= T.maximum(vec.norm(2, 0), K.epsilon())

            return T.dot(vec, self.W) + self.b

        return theano.map(f, T.arange(inputs[0].shape[0]), non_sequences=inputs)[0]
Exemplo n.º 51
0
 def read(self, img, center_x, center_y):
     loc_x, loc_y = center_x, center_y
     if img.ndim == 2:
         img = self.matrix2tensor4(img)
     batch_size = img.shape[0]
     img_paded = self.padding_img(img)
     retina, _ = theano.map(self.do_glimpes, sequences=[img_paded, loc_x,
                                                        loc_y])
     return retina.reshape((batch_size, self.get_dim('glimpse')))
Exemplo n.º 52
0
def jacobian1(f, v):
    """jacobian of f wrt v"""
    f = tt.flatten(f)
    idx = tt.arange(f.shape[0], dtype='int32')

    def grad_i(i):
        return gradient1(f[i], v)

    return theano.map(grad_i, idx)[0]
Exemplo n.º 53
0
Arquivo: ctc.py Projeto: choko/ctc
def compute_cost_with_cross_entropy_in_parallel(original_rnn_outputs, labels, x_ends, y_ends):
	mask = T.log(1 - T.or_(T.eq(labels, T.zeros_like(labels)), T.eq(labels, shift_matrix(labels, 2))))
	arange = T.arange(labels.shape[1])

	initial_state = T.log(T.zeros_like(labels))
	initial_state = T.set_subtensor(initial_state[:,0], 0)

	def select_probabilities(rnn_outputs, label):
		return rnn_outputs[:,label]	

	rnn_outputs, _ = theano.map(select_probabilities, [original_rnn_outputs, labels])
	rnn_outputs = T.log(rnn_outputs.dimshuffle((1,0,2)))

	def forward_step(probabilities, last_probabilities):
		all_forward_probabilities = T.stack(
			last_probabilities + probabilities,
			log_shift_matrix(last_probabilities, 1) + probabilities,
			log_shift_matrix(last_probabilities, 2) + probabilities + mask,
		)

		max_probability, backlink = T.max_and_argmax(all_forward_probabilities, 0)
		backlink = arange - backlink
		return max_probability, backlink

	results, _ = theano.scan(fn = forward_step, sequences = rnn_outputs, outputs_info = [initial_state, None])
	forward_probabilities, backward_pointers = results

	def compute_cost(rnn_outputs, forward_probabilities, backward_pointers, x_end, y_end, label):
		def backward_step(backlinks, position):
			new_position = backlinks[position]
			return new_position, position

		initial_state = T.argmax(forward_probabilities[x_end-1,y_end-2:y_end]) + y_end - 2

		results, _ = theano.scan(fn = backward_step, sequences = backward_pointers[0:x_end,:], outputs_info = [initial_state, None], go_backwards = True)
		alignment = label[results[1][::-1]]

		return aggregate(categorical_crossentropy(rnn_outputs[0:x_end], alignment), mode='sum')

	forward_probabilities = forward_probabilities.dimshuffle((1,0,2))
	backward_pointers = backward_pointers.dimshuffle((1,0,2))

	return theano.map(compute_cost, [original_rnn_outputs, forward_probabilities, backward_pointers, x_ends, y_ends, labels])[0]
Exemplo n.º 54
0
    def __init__(self, rng, 
                 input,
                 vocab_size, 
                 embed_dm, 
                 embeddings = None,
    ):
        """
        input: theano.tensor.dmatrix, (number of instances, sentence word number)
        
        vocab_size: integer, the size of vocabulary,

        embed_dm: integer, the dimension of word vector representation

        embeddings: theano.tensor.TensorType
        pretrained embeddings
        """                
        if embeddings:
            print "Use pretrained embeddings: ON"
            assert embeddings.get_value().shape == (vocab_size, embed_dm), "%r != %r" %(
                embeddings.get_value().shape, 
                (vocab_size, embed_dm)
            )
            
            self.embeddings = embeddings
        else:
            print "Use pretrained embeddings: OFF"
            embedding_val = np.asarray(
                rng.normal(0, 0.05, size = (vocab_size, embed_dm)), 
                dtype = theano.config.floatX
            )
            
            embedding_val[vocab_size-1,:] = 0 # the <PADDING> character is intialized to 0
            
            self.embeddings = theano.shared(
                np.asarray(embedding_val, 
                           dtype = theano.config.floatX),
                borrow = True,
                name = 'embeddings'
            )

        
        self.params = [self.embeddings]
        
        self.param_shapes = [(vocab_size, embed_dm)]
        
        # Return:
        
        # :type, theano.tensor.tensor4
        # :param, dimension(1, 1, word embedding dimension, number of words in sentence)
        #         made to be 4D to fit into the dimension of convolution operation
        sent_embedding_list, updates = theano.map(lambda sent: self.embeddings[sent], 
                                                  input)
        sent_embedding_tensor = T.stacklists(sent_embedding_list) # make it into a 3D tensor
        
        self.output = sent_embedding_tensor.dimshuffle(0, 'x', 2, 1) # make it a 4D tensor
Exemplo n.º 55
0
	def pv_function(self, tensor_input):
		indexf_matrix = theano.shared(
									np.ones(
										(self.max_length, self.max_length), 
										dtype=theano.config.floatX
										),
									name = 'indexf_matrix',
									borrow=True
									)
		pf_matrix = theano.shared(
								np.eye(
										self.max_length, 
										dtype=theano.config.floatX
										),
								name = 'pf_matrix',
								borrow=True
								)
		vf_matrix = theano.shared(
								np.zeros(
										(self.max_length, self.max_length, self.size), 
										dtype=theano.config.floatX
										),
								name = 'vf_matrix',
								borrow=True
								)

		results, updates = theano.reduce(
				fn = lambda i, t_vf_matrix, L, t_tensor_input: T.set_subtensor(t_vf_matrix[i, i], L[t_tensor_input[i]]),
				outputs_info = vf_matrix,
				sequences=[T.arange(tensor_input.shape[0])],
				non_sequences=[self.L, tensor_input],
				name = 'vf_matrix prepare'
				)
		vf_matrix = results
		
		for i in range(1,self.max_length):
			'''
			for j in range(self.max_length-i):
				new_index, new_pf, new_vf = self._pv_function1(j, j+i, pf_matrix, vf_matrix)
				indexf_matrix = T.set_subtensor(indexf_matrix[j, j+i], new_index)
				pf_matrix = T.set_subtensor(pf_matrix[j, j+i], new_pf)
				vf_matrix = T.set_subtensor(vf_matrix[j, j+i], new_vf)
			'''
			results, updates = theano.map(
				fn = lambda j, pf_matrix, vf_matrix, i: self._pv_function1(j, j+i, pf_matrix, vf_matrix),
				sequences=[T.arange(self.max_length-i)],
				non_sequences = [pf_matrix, vf_matrix, i],
				#name = 'pv function'
				)
			for j in range(self.max_length-i):
				indexf_matrix = T.set_subtensor(indexf_matrix[j, j+i], results[0][j])
				pf_matrix = T.set_subtensor(pf_matrix[j, j+i], results[1][j])
				vf_matrix = T.set_subtensor(vf_matrix[j, j+i], results[2][j])
			
		return indexf_matrix, pf_matrix, vf_matrix
Exemplo n.º 56
0
def make_gradlogps(mdp,agent):
    o = TT.matrix("o",mdp.output_dtype("o"))
    b = TT.matrix("b",agent.output_dtype("b"))
    newa = agent.ponder({"o":o})["a"]
    logp_n = agent.cpd().logliks(newa, b)
    def onegrad(i):
        logp1 = theano.clone(logp_n, replace = {b:b[i:i+1],o:o[i:i+1]})[0]
        return symbolic.flatten(TT.grad(logp1, agent.policy_vars()))
    gradlogps,_ = theano.map(onegrad, TT.arange(logp_n.shape[0]))
    Glf.ftheano_gradlogp = theano.function([o,b],gradlogps)
    Glf.f_gradlogp = staticmethod(lambda : G.pool.gather(gradlogpmapper,np.concatenate,None))
Exemplo n.º 57
0
def order0_ll_score_given_word_only(tg_word_id,
                                    tg_lp_tag_np_table, tg_tag_emb,
                                    tg_word_emb,num_tag):
    # Return the total 
    return log_sum_exp(theano.map(fn=order0_ll_score_given_word_and_tag,
                                  sequences=[T.arange(num_tag)],
                                  non_sequences=[tg_word_id,
                                                 tg_lp_tag_np_table,
                                                 tg_tag_emb,
                                                  tg_word_emb],
                                  name="order0_ll_score_map")[0])
Exemplo n.º 58
0
	def _pv_function(self, tensor_left, length, pf_matrix, vf_matrix):
		tensor_right = tensor_left + length
		results, updates = theano.map(
				fn = self.get_new_p,
				sequences=[T.arange(tensor_left, tensor_right)],
				non_sequences = [tensor_left, tensor_right, pf_matrix, vf_matrix],
				name = 'pv function'
				)
		max_pf, index = T.max_and_argmax(results, axis=0)
		max_vf = self.get_new_v(tensor_left, tensor_right, index + tensor_left, vf_matrix)
		return [index + tensor_left, max_pf, max_vf]