Пример #1
0
    def grad(self, inp, grads):
        x, dy, scale, x_mean, x_invstd, epsilon = inp
        ddinputs, ddscale, ddbias = grads

        x_diff = x - x_mean
        mean_dy_x_diff = T.mean(dy * x_diff, axis=self.axes, keepdims=True)

        # compute gradients given each of the output gradients
        g_wrt_x = 0
        g_wrt_dy = 0
        g_wrt_scale = 0
        g_wrt_x_mean = 0
        g_wrt_x_invstd = 0

        if not isinstance(ddinputs.type, theano.gradient.DisconnectedType):
            ccc = scale * (ddinputs -
                           T.mean(ddinputs, axis=self.axes, keepdims=True))
            ddd = (x_invstd**3) * (
                ccc * T.mean(dy * x_diff, axis=self.axes, keepdims=True) +
                dy * T.mean(ccc * x_diff, axis=self.axes, keepdims=True))

            g_wrt_x = g_wrt_x - ddd
            g_wrt_dy = g_wrt_dy + ((ccc * x_invstd) - (
                (x_invstd**3) * x_diff *
                T.mean(ccc * x_diff, axis=self.axes, keepdims=True)))

            eee = (dy * x_invstd) - ((x_invstd**3) * x_diff * mean_dy_x_diff)
            g_wrt_scale = g_wrt_scale + T.sum(
                ddinputs * (eee - T.mean(eee, axis=self.axes, keepdims=True)),
                axis=self.axes,
                keepdims=True)

            g_wrt_x_mean = g_wrt_x_mean + T.sum(
                ddd, axis=self.axes, keepdims=True)
            g_wrt_x_invstd = g_wrt_x_invstd + T.sum(
                ccc * (dy - 3 * (x_invstd**2) * x_diff * mean_dy_x_diff),
                axis=self.axes,
                keepdims=True)

        if not isinstance(ddscale.type, theano.gradient.DisconnectedType):
            g_wrt_x = g_wrt_x + (x_invstd * ddscale * dy)
            g_wrt_dy = g_wrt_dy + (x_invstd * ddscale * x_diff)
            g_wrt_x_mean = g_wrt_x_mean - (
                x_invstd * ddscale * T.sum(dy, axis=self.axes, keepdims=True))
            g_wrt_x_invstd = g_wrt_x_invstd + (
                ddscale * T.sum(dy * x_diff, axis=self.axes, keepdims=True))

        if not isinstance(ddbias.type, theano.gradient.DisconnectedType):
            g_wrt_dy = g_wrt_dy + T.fill(dy, ddbias)

        # depending on which output gradients are given,
        # some inputs should be disconnected
        results = [
            g_wrt_x, g_wrt_dy, g_wrt_scale, g_wrt_x_mean, g_wrt_x_invstd,
            theano.gradient.DisconnectedType()()
        ]
        return [
            theano.gradient.DisconnectedType()() if r is 0 else r
            for r in results
        ]
Пример #2
0
def local_abstract_batch_norm_train_grad(node):
    if not isinstance(node.op, AbstractBatchNormTrainGrad):
        return None

    x, dy, scale, x_mean, x_invstd, epsilon = node.inputs
    axes = node.op.axes
    if min(axes) < 0 or max(axes) > x.ndim:
        return None
    if not isinstance(x.type, TensorType) or \
       not isinstance(dy.type, TensorType) or \
       not isinstance(scale.type, TensorType) or \
       not isinstance(x_mean.type, TensorType) or \
       not isinstance(x_invstd.type, TensorType) or \
       not isinstance(epsilon.type, TensorType):
        return None

    x_diff = x - x_mean
    mean_dy_x_diff = T.mean(dy * x_diff, axis=axes, keepdims=True)
    c = (dy * x_invstd) - x_diff * (mean_dy_x_diff * (x_invstd ** 3))

    g_wrt_inputs = scale * (c - T.mean(c, axis=axes, keepdims=True))
    g_wrt_scale = T.sum(dy * x_invstd * x_diff, axis=axes, keepdims=True)
    g_wrt_bias = T.sum(dy, axis=axes, keepdims=True)
    results = [g_wrt_inputs, g_wrt_scale, g_wrt_bias]

    results = [T.patternbroadcast(r, r_orig.broadcastable)
               for (r, r_orig) in zip(results, node.outputs)]

    for var in theano.gof.graph.variables(node.inputs, results):
        if var not in node.inputs:
            copy_stack_trace(node.outputs[0], var)
    return results
    def grad(self, inp, grads):
        dy, sm = inp
        g, = grads

        tmp = g + tensor.neg(tensor.sum(g * sm, axis=1).dimshuffle((0, 'x')))
        g_dy = tmp * sm

        tmp2 = tensor.sum(dy * sm, axis=1).dimshuffle((0, 'x'))
        g_sm = tmp * dy - g * tmp2

        return g_dy, g_sm
Пример #4
0
    def grad(self, inputs, output_gradients):
        W, b, d, H, RShape = inputs
        dCdR, = output_gradients
        dCdH = conv3D(dCdR, W, T.zeros_like(H[0, 0, 0, 0, :]), d)
        WShape = W.shape
        dCdW = convGrad3D(dCdR, d, WShape, H)
        dCdb = T.sum(dCdR, axis=(0, 1, 2, 3))
        dCdd = None  #not differentiable, since d is not continuous
        dCdRShape = None  #not differentiable, since RShape is not continuous

        if 'name' in dir(dCdR) and dCdR.name is not None:
            dCdR_name = dCdR.name
        else:
            dCdR_name = 'anon'

        if 'name' in dir(H) and H.name is not None:
            H_name = H.name
        else:
            H_name = 'anon'

        if 'name' in dir(W) and W.name is not None:
            W_name = W.name
        else:
            W_name = 'anon'

        if 'name' in dir(b) and b.name is not None:
            b_name = b.name
        else:
            b_name = 'anon'

        dCdW.name = 'ConvTransp3D_dCdW.H=' + H_name + ',dCdR=' + dCdR_name + ',W=' + W_name
        dCdb.name = 'ConvTransp3D_dCdb.H=' + H_name + ',dCdR=' + dCdR_name + ',W=' + W_name + ',b=' + b_name
        dCdH.name = 'ConvTransp3D_dCdH.H=' + H_name + ',dCdR=' + dCdR_name

        return [dCdW, dCdb, dCdd, dCdH, dCdRShape]
def hierarchical_categorical_crossentropy(coding_dist, true_dist, hierarchy,
                                          inv_hierarchy, level_list):
    """
    Return the cross-entropy between an approximating distribution and a true
    distribution taking into account a hierarchy of classes
    Mathematically it is defined as follows:
    .. math::
        H(p,q) = - \sum_x p(x) \log(q(x))
    Parameters
    ----------
    coding_dist : a dense matrix
        Each slice along axis represents one distribution.
    true_dist : a dense matrix or sparse matrix or integer vector
        In the case of a matrix argument, each slice along axis represents one
        distribution. In the case of an integer vector argument, each element
        represents the position of the '1' in a 1-of-N encoding.
    Returns
    -------
    tensor of rank one-less-than `coding_dist`
        The cross entropy between each coding and true distribution.
    Notes
    -----
    axis : int
        The dimension over which each distribution runs
        (1 for row distributions, 0 for column distributions).
    """
    if true_dist.ndim == coding_dist.ndim:
        return -tensor.sum(true_dist * tensor.log(coding_dist),
                           axis=coding_dist.ndim - 1)
    elif true_dist.ndim == coding_dist.ndim - 1:
        return hierarchical_categorical_crossentropy_1hot(
            coding_dist, true_dist, hierarchy, inv_hierarchy, level_list)
    else:
        raise TypeError('rank mismatch between coding and true distributions')
Пример #6
0
def Generate_theta_p(x, s, I, N, K, prod_f):
    #print(s,N,len(x))
    T = len(x)
    D = len(x[0])
    s = [one_hot(ss, N) for ss in s]
    # x = [[xt for _ in range(N)] for xt in x]
    x = np.array(x)
    s = np.array(s)

    model = pm.Model()
    with model:
        # Priors for unknown model parameters
        theta = pm.Normal("theta", mu=0, sigma=1, shape=(D, K)) / np.sqrt(
            K * D)
        p_list = []
        for t in range(T):
            #print(prod_f)
            #print(np.transpose(theta))
            wt = dot(dot(prod_f, transpose(theta)), x[t])
            swt = s[t] * wt
            sum_sw = sum(swt)
            p = exp(swt) / (1 + sum_sw)
            p0 = 1 / (1 + sum_sw)
            p_list.append(concatenate(([p0], p)))

        I_obs = pm.Categorical("I_obs", p=stack(p_list, axis=0), observed=I)

    with model:
        step = pm.Metropolis()
        trace1 = pm.sample(tune=2000, chains=1, step=step)

    return trace1["theta"][-1]
Пример #7
0
    def grad(self, inp, grads):
        x, dy, scale, x_mean, x_invstd, epsilon = inp
        ddinputs, ddscale, ddbias = grads

        x_diff = x - x_mean
        mean_dy_x_diff = T.mean(dy * x_diff, axis=self.axes, keepdims=True)

        # compute gradients given each of the output gradients
        g_wrt_x = 0
        g_wrt_dy = 0
        g_wrt_scale = 0
        g_wrt_x_mean = 0
        g_wrt_x_invstd = 0

        if not isinstance(ddinputs.type, theano.gradient.DisconnectedType):
            ccc = scale * (ddinputs - T.mean(ddinputs, axis=self.axes, keepdims=True))
            ddd = (x_invstd ** 3) * (ccc * T.mean(dy * x_diff, axis=self.axes, keepdims=True) +
                                     dy * T.mean(ccc * x_diff, axis=self.axes, keepdims=True))

            g_wrt_x = g_wrt_x - ddd
            g_wrt_dy = g_wrt_dy + ((ccc * x_invstd) -
                                   ((x_invstd ** 3) * x_diff *
                                    T.mean(ccc * x_diff, axis=self.axes, keepdims=True)))

            eee = (dy * x_invstd) - ((x_invstd ** 3) * x_diff * mean_dy_x_diff)
            g_wrt_scale = g_wrt_scale + T.sum(ddinputs * (eee - T.mean(eee, axis=self.axes, keepdims=True)),
                                              axis=self.axes, keepdims=True)

            g_wrt_x_mean = g_wrt_x_mean + T.sum(ddd, axis=self.axes, keepdims=True)
            g_wrt_x_invstd = g_wrt_x_invstd + T.sum(ccc * (dy - 3 * (x_invstd ** 2) * x_diff * mean_dy_x_diff),
                                                    axis=self.axes, keepdims=True)

        if not isinstance(ddscale.type, theano.gradient.DisconnectedType):
            g_wrt_x = g_wrt_x + (x_invstd * ddscale * dy)
            g_wrt_dy = g_wrt_dy + (x_invstd * ddscale * x_diff)
            g_wrt_x_mean = g_wrt_x_mean - (x_invstd * ddscale * T.sum(dy, axis=self.axes, keepdims=True))
            g_wrt_x_invstd = g_wrt_x_invstd + (ddscale * T.sum(dy * x_diff, axis=self.axes, keepdims=True))

        if not isinstance(ddbias.type, theano.gradient.DisconnectedType):
            g_wrt_dy = g_wrt_dy + T.fill(dy, ddbias)

        # depending on which output gradients are given,
        # some inputs should be disconnected
        results = [g_wrt_x, g_wrt_dy, g_wrt_scale, g_wrt_x_mean, g_wrt_x_invstd,
                   theano.gradient.DisconnectedType()()]
        return [theano.gradient.DisconnectedType()() if r is 0 else r
                for r in results]
Пример #8
0
def norm(x, ord):
    x = as_tensor_variable(x)
    ndim = x.ndim
    if ndim == 0:
        raise ValueError("'axis' entry is out of bounds.")
    elif ndim == 1:
        if ord is None:
            return tensor.sum(x**2)**0.5
        elif ord == 'inf':
            return tensor.max(abs(x))
        elif ord == '-inf':
            return tensor.min(abs(x))
        elif ord == 0:
            return x[x.nonzero()].shape[0]
        else:
            try:
                z = tensor.sum(abs(x**ord))**(1. / ord)
            except TypeError:
                raise ValueError("Invalid norm order for vectors.")
            return z
    elif ndim == 2:
        if ord is None or ord == 'fro':
            return tensor.sum(abs(x**2))**(0.5)
        elif ord == 'inf':
            return tensor.max(tensor.sum(abs(x), 1))
        elif ord == '-inf':
            return tensor.min(tensor.sum(abs(x), 1))
        elif ord == 1:
            return tensor.max(tensor.sum(abs(x), 0))
        elif ord == -1:
            return tensor.min(tensor.sum(abs(x), 0))
        else:
            raise ValueError(0)
    elif ndim > 2:
        raise NotImplementedError("We don't support norm with ndim > 2")
Пример #9
0
def norm(x,ord):
    x = as_tensor_variable(x)
    ndim = x.ndim
    if ndim == 0:
        raise ValueError("'axis' entry is out of bounds.")
    elif ndim == 1:
        if ord is None:
            return tensor.sum(x**2)**0.5
        elif ord == 'inf':
            return tensor.max(abs(x))
        elif ord == '-inf':
            return tensor.min(abs(x))
        elif ord == 0:
            return x[x.nonzero()].shape[0]
        else:
            try:
                z = tensor.sum(abs(x**ord))**(1./ord)
            except TypeError:
                raise ValueError("Invalid norm order for vectors.")
            return z
    elif ndim == 2:
        if ord is None or ord == 'fro':
            return tensor.sum(abs(x**2))**(0.5)
        elif ord == 'inf':
            return tensor.max(tensor.sum(abs(x), 1))
        elif ord == '-inf':
            return tensor.min(tensor.sum(abs(x), 1))
        elif ord == 1:
            return tensor.max(tensor.sum(abs(x), 0))
        elif ord == -1:
            return tensor.min(tensor.sum(abs(x),0))
        else:
            raise ValueError(0)
    elif ndim > 2:
        raise NotImplementedError("We don't support norm witn ndim > 2")
Пример #10
0
def partially_linear(true_dist, coding_dist):
        loss = 0
        TIME = 150
        N_C = 21
        batch = 32
        for t in range (TIME):
                term1 = true_dist[:,t] * tensor.log(coding_dist[:,t]+0.0000001)
                term2 = (1-true_dist[:,t]) * tensor.log(1-coding_dist[:,t]+0.0000001)
                loss = loss + np.double(1)/N_C * tensor.sum(term1+term2*np.double(t)/TIME, axis=1)

        return -loss/batch
Пример #11
0
    def grad(self, inputs, output_gradients):
        V, W, b, d = inputs
        dCdH, = output_gradients
        # make all of these ops support broadcasting of scalar b to vector b and eplace the zeros_like in all their grads
        # print dCdH.broadcastable
        # print "dCdH.broadcastable"
        # quit(-1)
        # dCdH = printing.Print("dCdH = ",["shape"])

        # Make sure the broadcasting pattern of the gradient is the the same
        # as the initial variable
        dCdV = theano.tensor.nnet.convTransp3D(W, T.zeros_like(V[0, 0, 0,
                                                                 0, :]), d,
                                               dCdH, V.shape[1:4])
        dCdV = T.patternbroadcast(dCdV, V.broadcastable)
        WShape = W.shape
        dCdW = theano.tensor.nnet.convGrad3D(V, d, WShape, dCdH)
        dCdW = T.patternbroadcast(dCdW, W.broadcastable)
        dCdb = T.sum(dCdH, axis=(0, 1, 2, 3))
        dCdb = T.patternbroadcast(dCdb, b.broadcastable)
        dCdd = grad_undefined(
            self, 3, inputs[3],
            "The gradient of Conv3D with respect to the convolution"
            " stride is undefined because Conv3D is only defined for"
            " integer strides.")

        if 'name' in dir(dCdH) and dCdH.name is not None:
            dCdH_name = dCdH.name
        else:
            dCdH_name = 'anon_dCdH'

        if 'name' in dir(V) and V.name is not None:
            V_name = V.name
        else:
            V_name = 'anon_V'

        if 'name' in dir(W) and W.name is not None:
            W_name = W.name
        else:
            W_name = 'anon_W'

        if 'name' in dir(b) and b.name is not None:
            b_name = b.name
        else:
            b_name = 'anon_b'

        dCdV.name = 'Conv3D_dCdV(dCdH=' + dCdH_name + ',V=' + V_name + ')'
        dCdW.name = ('Conv3D_dCdW(dCdH=' + dCdH_name + ',V=' + V_name + ',W=' +
                     W_name + ')')
        dCdb.name = ('Conv3D_dCdb(dCdH=' + dCdH_name + ',V=' + V_name + ',W=' +
                     W_name + ',b=' + b_name + ')')

        return [dCdV, dCdW, dCdb, dCdd]
Пример #12
0
    def functions(self, sequence_length):
        key = (sequence_length)

        if key not in self.cache:
            logging.info("Need to construct graph for sequence_length=%d..." % (sequence_length))

            # creating network input variable nodes
            correct_inputs = t.ftensor3("correct input")
            noise_inputs = t.ftensor3("noise input")
            learning_rate = t.fscalar("learning rate")

            # creating op nodes for firing the network
            correct_score, correct_prehidden = self.score(correct_inputs)
            noise_score, noise_prehidden = self.score(noise_inputs)

            # creating op nodes for the pairwise ranking cost function
            loss = t.clip(1 - correct_score + noise_score, 0, 1e999)
            total_loss = t.sum(loss)

            # the necessary cost function gradients
            parameters_gradient = grad(total_loss, list(self.parameters))
            correct_inputs_gradient = grad(total_loss, correct_inputs)
            noise_inputs_gradient = grad(total_loss, noise_inputs)

            # setting network inputs
            predict_inputs = [correct_inputs]
            train_inputs = [correct_inputs, noise_inputs, learning_rate]
            verbose_predict_inputs = predict_inputs

            # setting network outputs
            predict_outputs = [correct_score]
            train_outputs = [correct_inputs_gradient, noise_inputs_gradient, loss, correct_score, noise_score]
            verbose_predict_outputs = [correct_score, correct_prehidden]

            nnodes = len(theano.gof.graph.ops(predict_inputs, predict_outputs))
            logging.info("About to compile prediction function over %d ops [nodes]..." % nnodes)
            predict = theano.function(predict_inputs, predict_outputs, mode=COMPILE_MODE)
            logging.info("...done constructing graph for sequence_length=%d" % (sequence_length))

            nnodes = len(theano.gof.graph.ops(verbose_predict_inputs, verbose_predict_outputs))
            logging.info("About to compile verbose prediction function over %d ops [nodes]..." % nnodes)
            verbose_predict = theano.function(verbose_predict_inputs, verbose_predict_outputs, mode=COMPILE_MODE)
            logging.info("...done constructing graph for sequence_length=%d" % (sequence_length))

            nnodes = len(theano.gof.graph.ops(train_inputs, train_outputs))
            logging.info("About to compile training function over %d ops [nodes]..." % nnodes)
            train = theano.function(train_inputs, train_outputs, mode=COMPILE_MODE, updates=[(p, p - learning_rate * gp) for p, gp in zip(list(self.parameters), parameters_gradient)])
            logging.info("...done constructing graph for sequence_length=%d" % (sequence_length))

            self.cache[key] = (predict, train, verbose_predict)

        return self.cache[key]
Пример #13
0
    def grad(self, inputs, output_gradients):
        V, W, b, d = inputs
        dCdH, = output_gradients
        # make all of these ops support broadcasting of scalar b to vector b and eplace the zeros_like in all their grads
        # print dCdH.broadcastable
        # print "dCdH.broadcastable"
        # quit(-1)
        # dCdH = printing.Print("dCdH = ",["shape"])

        # Make sure the broadcasting pattern of the gradient is the the same
        # as the initial variable
        dCdV = theano.tensor.nnet.convTransp3D(
            W, T.zeros_like(V[0, 0, 0, 0, :]), d, dCdH, V.shape[1:4])
        dCdV = T.patternbroadcast(dCdV, V.broadcastable)
        WShape = W.shape
        dCdW = theano.tensor.nnet.convGrad3D(V, d, WShape, dCdH)
        dCdW = T.patternbroadcast(dCdW, W.broadcastable)
        dCdb = T.sum(dCdH, axis=(0, 1, 2, 3))
        dCdb = T.patternbroadcast(dCdb, b.broadcastable)
        dCdd = grad_undefined(
            self, 3, inputs[3],
            "The gradient of Conv3D with respect to the convolution"
            " stride is undefined because Conv3D is only defined for"
            " integer strides.")

        if 'name' in dir(dCdH) and dCdH.name is not None:
            dCdH_name = dCdH.name
        else:
            dCdH_name = 'anon_dCdH'

        if 'name' in dir(V) and V.name is not None:
            V_name = V.name
        else:
            V_name = 'anon_V'

        if 'name' in dir(W) and W.name is not None:
            W_name = W.name
        else:
            W_name = 'anon_W'

        if 'name' in dir(b) and b.name is not None:
            b_name = b.name
        else:
            b_name = 'anon_b'

        dCdV.name = 'Conv3D_dCdV(dCdH=' + dCdH_name + ',V=' + V_name + ')'
        dCdW.name = ('Conv3D_dCdW(dCdH=' + dCdH_name + ',V=' + V_name +
                     ',W=' + W_name + ')')
        dCdb.name = ('Conv3D_dCdb(dCdH=' + dCdH_name + ',V=' + V_name +
                     ',W=' + W_name + ',b=' + b_name + ')')

        return [dCdV, dCdW, dCdb, dCdd]
Пример #14
0
def acc_weighted_cross_entropy(pred, targets):
    '''
    loss only counting misclassified 
    '''
    #weights = np.not_equal(pred, targets).astype("float")
    #return -np.sum(weights* targets * tensor.log(pred), axis=pred.ndim - 1)
    if targets.ndim == pred.ndim:
        weights = tensor.neq(pred, targets)
        return -tensor.sum(weights * targets * tensor.log(pred),
                           axis=pred.ndim - 1)
    else:
        print(targets.ndim, " ", pred.ndim)
        raise TypeError('rank mismatch between coding and true distributions')
Пример #15
0
    def grad(self,inputs, output_gradients):
        V,W,b,d = inputs
        dCdH ,= output_gradients
        #make all of these ops support broadcasting of scalar b to vector b and eplace the zeros_like in all their grads
        #print dCdH.broadcastable
        #print "dCdH.broadcastable"
        #quit(-1)
        #dCdH = printing.Print("dCdH = ",["shape"])

        # Make sure the broadcasting pattern of the gradient is the the same
        # as the initial variable
        dCdV = ConvTransp3D.convTransp3D(W, T.zeros_like(V[0,0,0,0,:]), d, dCdH, V.shape[1:4])
        dCdV = T.patternbroadcast(dCdV, V.broadcastable)
        WShape = W.shape
        dCdW = ConvGrad3D.convGrad3D(V,d,WShape,dCdH)
        dCdW = T.patternbroadcast(dCdW, W.broadcastable)
        dCdb = T.sum(dCdH, axis=(0,1,2,3))
        dCdb = T.patternbroadcast(dCdb, b.broadcastable)
        dCdd = None #not differentiable, since d is not continuous

        if 'name' in dir(dCdH) and dCdH.name is not None:
            dCdH_name = dCdH.name
        else:
            dCdH_name = 'anon'

        if 'name' in dir(V) and V.name is not None:
            V_name = V.name
        else:
            V_name = 'anon'

        if 'name' in dir(W) and W.name is not None:
            W_name = W.name
        else:
            W_name = 'anon'

        if 'name' in dir(b) and b.name is not None:
            b_name = b.name
        else:
            b_name = 'anon'

        dCdV.name = 'Conv3D_dCdV.dCdH='+dCdH_name+',V='+V_name
        dCdW.name = 'Conv3D_dCdW.dCdH='+dCdH_name+',V='+V_name+',W='+W_name
        dCdb.name = 'Conv3D_dCdb.dCdH='+dCdH_name+',V='+V_name+',W='+W_name+',b='+b_name



        return [ dCdV, dCdW, dCdb, dCdd ]
Пример #16
0
    def grad(self, inputs, output_gradients):
        V, W, b, d = inputs
        dCdH, = output_gradients
        #make all of these ops support broadcasting of scalar b to vector b and eplace the zeros_like in all their grads
        #print dCdH.broadcastable
        #print "dCdH.broadcastable"
        #quit(-1)
        #dCdH = printing.Print("dCdH = ",["shape"])

        dCdV = ConvTransp3D.convTransp3D(W, T.zeros_like(V[0, 0, 0, 0, :]), d,
                                         dCdH, V.shape[1:4])
        WShape = W.shape
        dCdW = ConvGrad3D.convGrad3D(V, d, WShape, dCdH)
        dCdb = T.sum(dCdH, axis=(0, 1, 2, 3))
        dCdd = None  #not differentiable, since d is not continuous

        if 'name' in dir(dCdH) and dCdH.name is not None:
            dCdH_name = dCdH.name
        else:
            dCdH_name = 'anon'

        if 'name' in dir(V) and V.name is not None:
            V_name = V.name
        else:
            V_name = 'anon'

        if 'name' in dir(W) and W.name is not None:
            W_name = W.name
        else:
            W_name = 'anon'

        if 'name' in dir(b) and b.name is not None:
            b_name = b.name
        else:
            b_name = 'anon'

        dCdV.name = 'Conv3D_dCdV.dCdH=' + dCdH_name + ',V=' + V_name
        dCdW.name = 'Conv3D_dCdW.dCdH=' + dCdH_name + ',V=' + V_name + ',W=' + W_name
        dCdb.name = 'Conv3D_dCdb.dCdH=' + dCdH_name + ',V=' + V_name + ',W=' + W_name + ',b=' + b_name

        return [dCdV, dCdW, dCdb, dCdd]
Пример #17
0
    def grad(self, inputs, output_gradients):
        W, b, d, H, RShape = inputs
        dCdR, = output_gradients
        dCdH = theano.tensor.nnet.conv3D(dCdR, W, T.zeros_like(H[0, 0, 0,
                                                                 0, :]), d)
        WShape = W.shape
        dCdW = theano.tensor.nnet.convGrad3D(dCdR, d, WShape, H)
        dCdb = T.sum(dCdR, axis=(0, 1, 2, 3))
        # not differentiable, since d affects the output elements
        dCdd = grad_undefined(self, 2, d)
        # disconnected, since RShape just determines the output shape
        dCdRShape = DisconnectedType()()

        if 'name' in dir(dCdR) and dCdR.name is not None:
            dCdR_name = dCdR.name
        else:
            dCdR_name = 'anon_dCdR'

        if 'name' in dir(H) and H.name is not None:
            H_name = H.name
        else:
            H_name = 'anon_H'

        if 'name' in dir(W) and W.name is not None:
            W_name = W.name
        else:
            W_name = 'anon_W'

        if 'name' in dir(b) and b.name is not None:
            b_name = b.name
        else:
            b_name = 'anon_b'

        dCdW.name = ('ConvTransp3D_dCdW.H=' + H_name + ',dCdR=' + dCdR_name +
                     ',W=' + W_name)
        dCdb.name = ('ConvTransp3D_dCdb.H=' + H_name + ',dCdR=' + dCdR_name +
                     ',W=' + W_name + ',b=' + b_name)
        dCdH.name = 'ConvTransp3D_dCdH.H=' + H_name + ',dCdR=' + dCdR_name

        return [dCdW, dCdb, dCdd, dCdH, dCdRShape]
Пример #18
0
    def grad(self, inputs, output_gradients):
        W, b, d, H, RShape = inputs
        dCdR, = output_gradients
        dCdH = theano.tensor.nnet.conv3D(dCdR, W, T.zeros_like(H[0, 0, 0, 0, :]), d)
        WShape = W.shape
        dCdW = theano.tensor.nnet.convGrad3D(dCdR, d, WShape, H)
        dCdb = T.sum(dCdR, axis=(0, 1, 2, 3))
        # not differentiable, since d affects the output elements
        dCdd = grad_undefined(self, 2, d)
        # disconnected, since RShape just determines the output shape
        dCdRShape = DisconnectedType()()

        if 'name' in dir(dCdR) and dCdR.name is not None:
            dCdR_name = dCdR.name
        else:
            dCdR_name = 'anon_dCdR'

        if 'name' in dir(H) and H.name is not None:
            H_name = H.name
        else:
            H_name = 'anon_H'

        if 'name' in dir(W) and W.name is not None:
            W_name = W.name
        else:
            W_name = 'anon_W'

        if 'name' in dir(b) and b.name is not None:
            b_name = b.name
        else:
            b_name = 'anon_b'

        dCdW.name = ('ConvTransp3D_dCdW.H=' + H_name + ',dCdR=' + dCdR_name +
                     ',W=' + W_name)
        dCdb.name = ('ConvTransp3D_dCdb.H=' + H_name + ',dCdR=' + dCdR_name +
                     ',W=' + W_name + ',b=' + b_name)
        dCdH.name = 'ConvTransp3D_dCdH.H=' + H_name + ',dCdR=' + dCdR_name

        return [dCdW, dCdb, dCdd, dCdH, dCdRShape]
Пример #19
0
    def grad(self,inputs, output_gradients):
        W,b,d,H, RShape = inputs
        dCdR ,= output_gradients
        dCdH = conv3D( dCdR, W, T.zeros_like(H[0,0,0,0,:]), d)
        WShape = W.shape
        dCdW = convGrad3D(dCdR,d,WShape,H)
        dCdb = T.sum(dCdR,axis=(0,1,2,3))
        dCdd = None #not differentiable, since d is not continuous
        dCdRShape = None #not differentiable, since RShape is not continuous


        if 'name' in dir(dCdR) and dCdR.name is not None:
            dCdR_name = dCdR.name
        else:
            dCdR_name = 'anon'

        if 'name' in dir(H) and H.name is not None:
            H_name = H.name
        else:
            H_name = 'anon'

        if 'name' in dir(W) and W.name is not None:
            W_name = W.name
        else:
            W_name = 'anon'

        if 'name' in dir(b) and b.name is not None:
            b_name = b.name
        else:
            b_name = 'anon'


        dCdW.name = 'ConvTransp3D_dCdW.H='+H_name+',dCdR='+dCdR_name+',W='+W_name
        dCdb.name = 'ConvTransp3D_dCdb.H='+H_name+',dCdR='+dCdR_name+',W='+W_name+',b='+b_name
        dCdH.name = 'ConvTransp3D_dCdH.H='+H_name+',dCdR='+dCdR_name

        return [ dCdW,  dCdb, dCdd, dCdH, dCdRShape ]
Пример #20
0
    def functions(self, sequence_length):
        key = (sequence_length)

        if key not in self.cache:
            logging.info("Need to construct graph for sequence_length=%d..." %
                         (sequence_length))

            # creating network input variable nodes
            correct_inputs = t.ftensor3("correct input")
            noise_inputs = t.ftensor3("noise input")
            learning_rate = t.fscalar("learning rate")

            # creating op nodes for firing the network
            correct_score, correct_prehidden = self.score(correct_inputs)
            noise_score, noise_prehidden = self.score(noise_inputs)

            # creating op nodes for the pairwise ranking cost function
            loss = t.clip(1 - correct_score + noise_score, 0, 1e999)
            total_loss = t.sum(loss)

            # the necessary cost function gradients
            parameters_gradient = grad(total_loss, list(self.parameters))
            correct_inputs_gradient = grad(total_loss, correct_inputs)
            noise_inputs_gradient = grad(total_loss, noise_inputs)

            # setting network inputs
            predict_inputs = [correct_inputs]
            train_inputs = [correct_inputs, noise_inputs, learning_rate]
            verbose_predict_inputs = predict_inputs

            # setting network outputs
            predict_outputs = [correct_score]
            train_outputs = [
                correct_inputs_gradient, noise_inputs_gradient, loss,
                correct_score, noise_score
            ]
            verbose_predict_outputs = [correct_score, correct_prehidden]

            nnodes = len(theano.gof.graph.ops(predict_inputs, predict_outputs))
            logging.info(
                "About to compile prediction function over %d ops [nodes]..." %
                nnodes)
            predict = theano.function(predict_inputs,
                                      predict_outputs,
                                      mode=COMPILE_MODE)
            logging.info("...done constructing graph for sequence_length=%d" %
                         (sequence_length))

            nnodes = len(
                theano.gof.graph.ops(verbose_predict_inputs,
                                     verbose_predict_outputs))
            logging.info(
                "About to compile verbose prediction function over %d ops [nodes]..."
                % nnodes)
            verbose_predict = theano.function(verbose_predict_inputs,
                                              verbose_predict_outputs,
                                              mode=COMPILE_MODE)
            logging.info("...done constructing graph for sequence_length=%d" %
                         (sequence_length))

            nnodes = len(theano.gof.graph.ops(train_inputs, train_outputs))
            logging.info(
                "About to compile training function over %d ops [nodes]..." %
                nnodes)
            train = theano.function(
                train_inputs,
                train_outputs,
                mode=COMPILE_MODE,
                updates=[(p, p - learning_rate * gp) for p, gp in zip(
                    list(self.parameters), parameters_gradient)])
            logging.info("...done constructing graph for sequence_length=%d" %
                         (sequence_length))

            self.cache[key] = (predict, train, verbose_predict)

        return self.cache[key]