示例#1
0
    def criterion(self):

        # hyperparameters
        lambda_val = 0.5

        # Margin loss
        left = ct.square(ct.relu(0.9 - self.length))
        right = ct.square(ct.relu(self.length - 0.1))
        left = ct.reshape(left, (-1))
        right = ct.reshape(right, (-1))
        lc = self.labels * left + lambda_val * (1 - self.labels) * right

        margin_loss = ct.reduce_sum(lc, axis=0)
        margin_loss = ct.reduce_mean(margin_loss, axis=ct.axis.Axis.default_batch_axis())

        # classification_error
        predict = ct.softmax(self.length, axis=0)
        error = ct.classification_error(ct.reshape(predict, (10)), self.labels)

        total_loss = margin_loss
        reconstruction_err = 0

        if self.use_reconstruction:
            features = ct.reshape(self.features, shape=(-1,))
            encoder = ct.reshape(self.training_model, shape=(-1,))
            squared = ct.square(encoder - features)
            reconstruction_err = ct.reduce_mean(squared, axis=0)
            reconstruction_err = ct.reduce_mean(reconstruction_err, axis=ct.axis.Axis.default_batch_axis())
            total_loss = margin_loss + (0.0005*784) * reconstruction_err

        return total_loss, error
示例#2
0
def true_density(z):
    z1, z2 = z[0], z[1]
    norm = C.sqrt(C.square(z1) + C.square(z2))
    exp1 = C.exp(-0.5 * C.square((z1 - 2) / 0.8))
    exp2 = C.exp(-0.5 * C.square((z1 + 2) / 0.8))
    u = 0.5 * C.square(((norm - 4) / 0.4)) - C.log(exp1 + exp2)
    return C.exp(-u)
示例#3
0
    def model(seq_image, decoded):
        params = dense(decoded)
        g_x, g_y, sigma2, delta, gamma = attention_parameters(params)

        i = C.Constant(np.arange(n) + 1, )  # col of patch
        j = C.Constant(np.arange(n) + 1, )  # row of patch
        mu_x = g_x + (i - n / 2 - 0.5) * delta
        mu_y = g_y + (j - n / 2 - 0.5) * delta
        mu_x = C.expand_dims(mu_x, axis=-1)
        mu_y = C.expand_dims(mu_y, axis=-1)
        # mu_x: [#, *] [n, 1]
        # mu_y: [#, *] [n, 1]

        image = C.sequence.unpack(seq_image,
                                  padding_value=0,
                                  no_mask_output=True)
        # image: [#] [*image_width, filters, image_height]

        width_pos = Cx.sequence.position(seq_image)
        # width_pos: [#, *] [1]

        width_pos_unpacked = C.sequence.unpack(width_pos,
                                               padding_value=999_999,
                                               no_mask_output=True)
        # width_pos: [#] [*image_width, 1]

        a = C.sequence.broadcast_as(C.swapaxes(width_pos_unpacked), mu_x)
        # a: [#, *] [1, *image_width]
        # x pos index of image (width)

        b = C.Constant(np.arange(image_height).reshape((1, -1)))
        # b: [] [1, image_height]
        # y pos index of image (height)

        # calculate the which portion of the image that is attended by the gaussian filter
        f_xi = C.exp(-0.5 * C.square(a - mu_x) / sigma2)
        f_yj = C.exp(-0.5 * C.square(b - mu_y) / sigma2)
        # f_xi: [#, *] [n, *image_width]
        # f_yj: [#, *] [n, image_height]

        z_x = C.reduce_sum(f_xi, axis=1)
        z_y = C.reduce_sum(f_yj, axis=1)
        # z_x: [#, *] [n]
        # z_y: [#, *] [n]

        f_xi = f_xi / z_x
        f_yj = f_yj / z_y
        # f_xi: [#, *] [n, *image_width]
        # f_yj: [#, *] [n, image_height]

        # combine filters from x and y
        image_broadcasted = C.sequence.broadcast_as(image, f_yj)
        attended = gamma * C.times(
            f_xi, C.times_transpose(image_broadcasted, f_yj), output_rank=2)
        # attended: [#, *] [n, filters, n]
        attended = C.swapaxes(attended)
        # attended: [#, *] [filters, n (x) , n (y)]
        return attended
def total_variation_loss(x):
    xx = C.reshape(x, (1,)+x.shape)
    delta = np.array([-1, 1], dtype=np.float32)
    kh = C.constant(value=delta.reshape(1, 1, 1, 1, 2))
    kv = C.constant(value=delta.reshape(1, 1, 1, 2, 1))
    dh = C.convolution(kh, xx, auto_padding=[False])
    dv = C.convolution(kv, xx, auto_padding=[False])
    avg = 0.5 * (C.reduce_mean(C.square(dv)) + C.reduce_mean(C.square(dh)))
    return avg
示例#5
0
 def create_model(self):
     hidden_layers = self._hidden_layers
     with C.layers.default_options(init=C.layers.glorot_uniform(),
                                   activation=C.ops.relu):
         h = self._input
         for i in range(self._num_hidden_layers):
             h = C.layers.Dense(hidden_layers[i])(h)
         model = C.layers.Dense(self._output_size, activation=None)(h)
         loss = C.reduce_mean(C.square(model - self._output), axis=0)
         meas = C.reduce_mean(C.square(model - self._output), axis=0)
         learner = C.adadelta(model.parameters, self._lr_schedule)
         trainer = C.Trainer(model, (loss, meas), learner)
         return model, loss, learner, trainer
示例#6
0
    def __get_trainer_loss(self, model, action_count):
        q_target = C.sequence.input_variable(action_count, np.float32)

        # loss='mse'
        loss = C.reduce_mean(C.square(model - q_target), axis=0)
        meas = C.reduce_mean(C.square(model - q_target), axis=0)

        # optimizer
        lr_schedule = C.learning_rate_schedule(LEARNING_RATE,
                                               C.UnitType.minibatch)
        learner = C.sgd(model.parameters,
                        lr_schedule,
                        gradient_clipping_threshold_per_sample=10)
        trainer = C.Trainer(model, (loss, meas), learner)

        return trainer, loss
示例#7
0
 def create_model(self):
     hidden_layers = self._hidden_layers
     with cntk.layers.default_options(init=cntk.layers.glorot_uniform(),
                                      activation=cntk.ops.relu):
         h = self._input
         for i in range(self._num_hidden_layers):
             h = cntk.layers.Dense(hidden_layers[i],
                                   activation=cntk.ops.relu)(h)
         model = cntk.layers.Dense(self._output_size, activation=None)(h)
         loss = cntk.reduce_mean(cntk.square(model - self._output), axis=0)
         meas = cntk.reduce_mean(cntk.square(model - self._output), axis=0)
         learner = cntk.adadelta(model.parameters,
                                 self._lr_schedule,
                                 l2_regularization_weight=0.01)
         trainer = cntk.Trainer(model, (loss, meas), learner)
         return model, loss, learner, trainer
示例#8
0
    def window_weight(a, b, k, u):
        """
        Calculate Phi is the window weight of character seq at position u of time t.
        Function tested to be correct on 2018-25-02 using numpy equivalent

        math:
            phi = summation of mixtures { a * exp ( -b * (k - u) ^ 2 ) }

        Args:
            a: importance of window within the mixture. Not normalised and doesn't sum to one.
            b: width of attention window
            k: location of window
            u: integer position of each item in sequence. Value from 1 to seq_length. (rank 2 tensor) [-3, 1]

        Returns:
            :class:`~cntk.ops.functions.Function`

        """
        # print(f"k shape: {k.shape}, u shape: {u.shape}")
        phi = a * C.exp(-1 * b * C.square(k - u))
        # print("internal phi shape:", phi.shape)
        phi = C.swapaxes(C.reduce_sum(phi,
                                      axis=0))  # Reduce sum the mixture axis
        # phi: [#, n] [*-c, 1]
        return phi
示例#9
0
def run_cntk(image_path, model_path):
    import functools
    import cv2

    model = cntk.load_model(model_path)

    pool_nodes = list()
    for l in cntk.logging.depth_first_search(model, lambda x: True, depth=0):
        if type(l) is cntk.ops.functions.Function:
            description = str(l)
            if description.find('Pooling') >= 0:
                pool_nodes.append(l)
                print(l)
    print(pool_nodes)

    # node contributions to the loss metric
    layer_contributions = {
        pool_nodes[2]: 1,
        pool_nodes[3]: 3,
    }

    # Define the loss
    loss = None
    for layer in layer_contributions.keys():
        coeff = layer_contributions[layer]
        activation = layer.output
        scaling = functools.reduce(lambda x, y: x * y, activation.shape)
        sum_squares = cntk.reduce_sum(cntk.square(activation))
        scaled_sum_squares = (coeff / scaling) * sum_squares
        if loss is None:
            loss = scaled_sum_squares
        else:
            loss += scaled_sum_squares

    dream = cntk.input_variable(shape=model.arguments[0].shape,
                                needs_gradient=True,
                                name='features')
    model = cntk.ops.combine(loss).clone(
        cntk.ops.CloneMethod.freeze, substitutions={model.arguments[0]: dream})
    step = 0.1  # Gradient ascent step size
    iterations = 5  # Number of ascent steps per scale

    # Load the image into a Numpy array
    img = cv2.imread(image_path)
    img = cv2.resize(img, (224, 224))

    # cv2.imshow('Original Image', img.copy())

    img = img.astype(np.float32)
    img = np.transpose(img, (2, 0, 1))
    img /= 127.5
    img -= 1
    img = gradient_ascent_cntk(model, img, iterations=iterations, step=step)
    img = np.transpose(img, (1, 2, 0))
    img /= 2.
    img += 0.5
    img *= 255.
    img = np.clip(img, 0, 255).astype('uint8')
    return img
示例#10
0
def test_gather_op(device_id, precision):
    a_data = [AA([[0],[1]], dtype=PRECISION_TO_TYPE[precision]),
              AA([[3],[4]], dtype=PRECISION_TO_TYPE[precision])]
    a = C.input_variable((2,1))
    r_data = np.arange(12).reshape(6,2).astype('f')
    r = C.parameter(shape=r_data.data, init=r_data)
    res = C.gather(r, a).eval({a:a_data})
    expectd = np.asarray([[[[0., 1.]],[[2., 3.]]],[[[6., 7.]],[[8.,9.]]]])
    assert np.array_equal(res, expectd)

    grads = C.gather(r, a).grad({a:a_data}, [r])
    expectd_grad = np.asarray([[1,1],[1,1],[0,0],[1,1],[1,1],[0,0]], dtype=np.float32)
    assert np.array_equal(grads, expectd_grad)

    #gather with indices from learning parameter (no gradients should passed through the indices -- 0s should be passed)
    indices_params = C.parameter(shape=(1,), init=1.0)
    grads = C.gather(r, (indices_params *a)).grad({a:a_data}, [r, indices_params])
    assert np.array_equal(grads[r], expectd_grad)
    assert np.array_equal(grads[indices_params], np.asarray([0.0], dtype=np.float32))


    b_data = [AA([[0,2],[1,3]], dtype=PRECISION_TO_TYPE[precision]),
              AA([[2,4],[3,5]], dtype=PRECISION_TO_TYPE[precision])]
    b = C.input_variable((2,2))
    res2 = C.gather(r, b).eval({b:b_data})

    expectd2 = np.asarray([[[[0., 1.],[4.,5.]],[[2., 3.],[6., 7.]]],[[[4., 5.],[8.,9.]],[[6., 7.], [10., 11.]]]])
    assert np.array_equal(res2, expectd2)

    #the following small model is to test the memory reuse issue of gather node.
    x = C.input((3, 4))
    x1 = C.to_sequence(x)
    w = C.parameter((5, 6), init=1)
    z = C.gather(w, x1)
    assert z.shape == (4, 6)
    #need the unpack node to trigger memory reuse.
    f = C.sequence.unpack(z, 0, no_mask_output=True)
    y = C.input((3, 4, 6))
    loss = C.reduce_mean(C.square(f - y), axis=-1)
    loss = C.reduce_mean(loss, axis=C.Axis.all_axes())

    g = C.constant(0, shape=w.shape)
    u = C.assign(w, g + 1)
    learner = C.cntk_py.universal_learner([w], [g], u)
    trainer = C.trainer.Trainer(loss, [loss], [learner])
    indices = np.asarray([[[1, 2, 1, 2]]])
    input = np.repeat(np.repeat(indices, 3, axis=1), 10, axis=0)
    lable = np.full((10, 3, 4, 6), 2)
    trainer.train_minibatch({x: input, y: lable})
    # the 2nd and 3rd rows should be udpated by gradients.
    assert np.mean(w.value[1, :]) < 1
    assert np.mean(w.value[2, :]) < 1
    # the other three rows should keep as 1
    assert np.isclose(np.mean(w.value[0, :]), 1)
    assert np.isclose(np.mean(w.value[3, :]), 1)
    assert np.isclose(np.mean(w.value[4, :]), 1)
示例#11
0
def test_gather_op(device_id, precision):
    a_data = [AA([[0],[1]], dtype=PRECISION_TO_TYPE[precision]),
              AA([[3],[4]], dtype=PRECISION_TO_TYPE[precision])]
    a = C.input_variable((2,1))
    r_data = np.arange(12).reshape(6,2).astype('f')
    r = C.parameter(shape=r_data.data, init=r_data)
    res = C.gather(r, a).eval({a:a_data})
    expectd = np.asarray([[[[0., 1.]],[[2., 3.]]],[[[6., 7.]],[[8.,9.]]]])
    assert np.array_equal(res, expectd)

    grads = C.gather(r, a).grad({a:a_data}, [r])
    expectd_grad = np.asarray([[1,1],[1,1],[0,0],[1,1],[1,1],[0,0]], dtype=np.float32)
    assert np.array_equal(grads, expectd_grad)

    #gather with indices from learning parameter (no gradients should passed through the indices -- 0s should be passed)
    indices_params = C.parameter(shape=(1,), init=1.0)
    grads = C.gather(r, (indices_params *a)).grad({a:a_data}, [r, indices_params])
    assert np.array_equal(grads[r], expectd_grad)
    assert np.array_equal(grads[indices_params], np.asarray([0.0], dtype=np.float32))


    b_data = [AA([[0,2],[1,3]], dtype=PRECISION_TO_TYPE[precision]),
              AA([[2,4],[3,5]], dtype=PRECISION_TO_TYPE[precision])]
    b = C.input_variable((2,2))
    res2 = C.gather(r, b).eval({b:b_data})

    expectd2 = np.asarray([[[[0., 1.],[4.,5.]],[[2., 3.],[6., 7.]]],[[[4., 5.],[8.,9.]],[[6., 7.], [10., 11.]]]])
    assert np.array_equal(res2, expectd2)

    #the following small model is to test the memory reuse issue of gather node.
    x = C.input((3, 4))
    x1 = C.to_sequence(x)
    w = C.parameter((5, 6), init=1)
    z = C.gather(w, x1)
    assert z.shape == (4, 6)
    #need the unpack node to trigger memory reuse.
    f = C.sequence.unpack(z, 0, no_mask_output=True)
    y = C.input((3, 4, 6))
    loss = C.reduce_mean(C.square(f - y), axis=-1)
    loss = C.reduce_mean(loss, axis=C.Axis.all_axes())

    g = C.constant(0, shape=w.shape)
    u = C.assign(w, g + 1)
    learner = C.cntk_py.universal_learner([w], [g], u)
    trainer = C.trainer.Trainer(loss, [loss], [learner])
    indices = np.asarray([[[1, 2, 1, 2]]])
    input = np.repeat(np.repeat(indices, 3, axis=1), 10, axis=0)
    lable = np.full((10, 3, 4, 6), 2)
    trainer.train_minibatch({x: input, y: lable})
    # the 2nd and 3rd rows should be udpated by gradients.
    assert np.mean(w.value[1, :]) < 1
    assert np.mean(w.value[2, :]) < 1
    # the other three rows should keep as 1
    assert np.isclose(np.mean(w.value[0, :]), 1)
    assert np.isclose(np.mean(w.value[3, :]), 1)
    assert np.isclose(np.mean(w.value[4, :]), 1)
示例#12
0
    def squash(input):

        # ||Sj||^2
        Sj_squared_norm = ct.reduce_sum(ct.square(input), axis=axis)

        # ||Sj||^2 / (1 + ||Sj||^2) * (Sj / ||Sj||)
        factor = ct.element_divide(
            ct.element_divide(Sj_squared_norm, ct.plus(1, Sj_squared_norm)),
            ct.sqrt(ct.plus(Sj_squared_norm, epsilon)))
        return factor * input
示例#13
0
 def __local_response_normalization(self, k, n, alpha, beta, name=''):
     x = cntk.placeholder(name='lrn_arg')
     x2 = cntk.square(x)
     x2s = cntk.reshape(x2, (1, cntk.InferredDimension), 0, 1)
     W = cntk.constant(alpha / (2 * n + 1), (1, 2 * n + 1, 1, 1), name='W')
     y = cntk.convolution(W, x2s)
     b = cntk.reshape(y, cntk.InferredDimension, 0, 2)
     den = cntk.exp(beta * cntk.log(k + b))
     apply_x = cntk.element_divide(x, den)
     return apply_x
示例#14
0
 def lrn(x, depth_radius, bias, alpha, beta, name=''):
     x2 = C.square(x)
     # reshape to insert a fake singleton reduction dimension after the 3th axis (channel axis). Note Python axis order and BrainScript are reversed.
     x2s = C.reshape(x2, (1, C.InferredDimension), 0, 1)
     W = C.constant(alpha/(2*depth_radius+1), shape=(1,2*depth_radius+1,1,1), dtype=dtype, name='W')
     # 3D convolution with a filter that has a non 1-size only in the 3rd axis, and does not reduce since the reduction dimension is fake and 1
     y = C.convolution (W, x2s)
     # reshape back to remove the fake singleton reduction dimension
     b = C.reshape(y, C.InferredDimension, 0, 2)
     den = C.exp(beta * C.log(bias + b))
     return C.element_divide(x, den)
示例#15
0
文件: NIN_test2.py 项目: lizishu/CNTK
def LocalResponseNormalization(k, n, alpha, beta, name=''):
    x = C.placeholder(name='lrn_arg')
    x2 = C.square(x)
    x2s = C.reshape(x2, (1, C.InferredDimension), 0, 1)
    W = C.constant(alpha / (2 * n + 1), (1, 2 * n + 1, 1, 1), name='W')
    y = C.convolution(W, x2s)
    b = C.reshape(y, C.InferredDimension, 0, 2)
    den = C.exp(beta * C.log(k + b))
    apply_x = C.element_divide(x, den)

    return apply_x
示例#16
0
 def lrn(x, depth_radius, bias, alpha, beta, name=''):
     x2 = C.square(x)
     # reshape to insert a fake singleton reduction dimension after the 3th axis (channel axis). Note Python axis order and BrainScript are reversed.
     x2s = C.reshape(x2, (1, C.InferredDimension), 0, 1)
     W = C.constant(alpha/(2*depth_radius+1), shape=(1,2*depth_radius+1,1,1), dtype=dtype, name='W')
     # 3D convolution with a filter that has a non 1-size only in the 3rd axis, and does not reduce since the reduction dimension is fake and 1
     y = C.convolution (W, x2s)
     # reshape back to remove the fake singleton reduction dimension
     b = C.reshape(y, C.InferredDimension, 0, 2)
     den = C.exp(beta * C.log(bias + b))
     return C.element_divide(x, den)
示例#17
0
def true_density(z):
    z1, z2 = z[0], z[1]

    w1 = lambda x: C.sin(2 * np.pi * x/4)
    u = 0.5 * C.square((z2 - w1(z1))/0.4)
    dummy = C.ones_like(u) * 1e7

    # u = C.element_select(C.less_equal(z1,4), u, dummy)
    cond = C.less_equal(z1,4)
    u = C.element_select(cond, u, dummy) # u = cond*u + (1-cond)*dummy

    return C.exp(-u)
示例#18
0
def LocalResponseNormalization(k, n, alpha, beta, name=''):
    x = C.placeholder(name='lrn_arg')
    x2 = C.square(x)
    # reshape to insert a fake singleton reduction dimension after the 3th axis (channel axis). Note Python axis order and BrainScript are reversed.
    x2s = C.reshape(x2, (1, C.InferredDimension), 0, 1)
    W = C.constant(alpha / (2 * n + 1), (1, 2 * n + 1, 1, 1), name='W')
    # 3D convolution with a filter that has a non 1-size only in the 3rd axis, and does not reduce since the reduction dimension is fake and 1
    y = C.convolution(W, x2s)
    # reshape back to remove the fake singleton reduction dimension
    b = C.reshape(y, C.InferredDimension, 0, 2)
    den = C.exp(beta * C.log(k + b))
    apply_x = C.element_divide(x, den)
    return apply_x
示例#19
0
def LocalResponseNormalization(k, n, alpha, beta, name=''):
    x = C.placeholder(name='lrn_arg')
    x2 = C.square(x)
    # reshape to insert a fake singleton reduction dimension after the 3th axis (channel axis). Note Python axis order and BrainScript are reversed.
    x2s = C.reshape(x2, (1, C.InferredDimension), 0, 1)
    W = C.constant(alpha/(2*n+1), (1,2*n+1,1,1), name='W')
    # 3D convolution with a filter that has a non 1-size only in the 3rd axis, and does not reduce since the reduction dimension is fake and 1
    y = C.convolution (W, x2s)
    # reshape back to remove the fake singleton reduction dimension
    b = C.reshape(y, C.InferredDimension, 0, 2)
    den = C.exp(beta * C.log(k + b))
    apply_x = C.element_divide(x, den)
    return apply_x
示例#20
0
    def var(array,W=_W,B=None,square=0,sqrt=0,V=False,sizz=0):
        #W=tf.transpose(W, [0,2,3,1])
        
        arrs=array.shape
        ashp=W.shape
        sb=(W.shape[1],1,1)
        WV=W.shape[-2:]
        xi=(-2,-1)
        x2=(-2,-1,-3)

        if V:
            print(W.eval())
            print(arrs,ashp)
        mul=(array*W)

        if V:
            print('Wsamp',W[-1,-1].eval())
            print('array*w',(mul.eval())[0,-1])

        size=C.reduce_sum(W,axis=xi)#shape=(outputs, channel)

        if V:
            print("sizesamp",size.shape,size.eval())
        if B is None:
            B=C.constant(0,shape=W.shape[0:2],dtype=np.float32)#channel
        B=C.reshape(B,(*B.shape,*[1 for _ in range(len(ashp)-len(B.shape))]))
        if sizz==1:
            mean=C.reduce_sum(mul,axis=xi)/size
        else:
            mean=C.reduce_sum(mul,axis=xi)/C.constant(value=WV[0]*WV[1],shape=sb,dtype=np.float32)
        if V:
            print("meansamp",mean.eval()[0,-1])
        if square:
            i=(C.square(mul-mean)+B)
        else:
            i=(((mul)-mean)+B)
        di=i/size
        if V==2:
            print("i",i.eval(),"i")
            print("di",di.eval(),"di")
        if V:
            print('isamp',i.shape,i.eval()[-1,-1,])
        out=C.reduce_sum(i+B,axis=x2)
        #out=np.rollaxis(np.sum(i+B,axis=x2),-1,1)
        print(out.shape)
        if sqrt:
            out=C.sqrt(out)
        out=C.swapaxes(C.reshape(out,out.shape[:4]), 3, 1)
        print(out.shape)
        assert out.shape==(arrs[0],ashp[0],arrs[1],arrs[2])
        return(out)
示例#21
0
    def gaussian_mdn_phi(target, mu, sigma, ndim: int):
        """
        Calculates phi between the target tensor and the network prediction
        Does not assumes independence between components of target.

        Arguments:
            target: target tensor with shape (ndim, )
            mu: means of gaussian mdn with shape (nmix, ndim)
            sigma: sigma of gaussian mdn
            nmix (int): number of mixtures
            ndim (int): number of dimensions in gaussian

        Returns:
            :class:`~cntk.ops.functions.Function`
        """
        if not len(mu.shape) == 2:
            raise ValueError("mu {0} must have shape (nmix, ndim)".format(mu.shape))

        t = C.expand_dims(target, axis=0)

        exp_term = C.exp(C.negate(C.square(C.reduce_l2(t - mu, axis=-1)) / (2 * C.square(sigma))))
        factor = C.reciprocal((2 * pi) ** (ndim / 2) * C.pow(sigma, ndim))
        return factor * exp_term
示例#22
0
    def _create(self, hidden):
        observation = C.input_variable(STATE_COUNT, name="s")
        q_target = C.input_variable(ACTION_COUNT, name="q")

        model = C.layers.Dense(hidden, activation=C.relu)(observation)
        model = C.layers.Dense(ACTION_COUNT)(model)

        # loss='mse'
        loss = C.reduce_mean(C.square(model - q_target)) #, axis=0)

        # optimizer
        lr = 0.00025
        lr_schedule = C.learning_parameter_schedule(lr)
        learner = C.sgd(model.parameters, lr_schedule, gradient_clipping_threshold_per_sample=10)
        trainer = C.Trainer(model, (loss, None), learner)

        return model, trainer, loss
示例#23
0
def square(x, name=''):
    '''
    Computes the element-wise square of `x`:     

    Example:
        >>> C.eval(C.square([1., 10.]))
        [array([[ 1.      ,  100.]])]

    Args:
        x: numpy array or any :class:`cntk.Function` that outputs a tensor
        name (str): the name of the node in the network
    Returns:
        :class:`cntk.Function`
    '''
    from cntk import square
    x = sanitize_input(x)
    return square(x, name).output()    
示例#24
0
def square(x, name=''):
    '''
    Computes the element-wise square of `x`:     

    Example:
        >>> C.eval(C.square([1., 10.]))
        [array([[ 1.      ,  100.]])]

    Args:
        x: numpy array or any :class:`cntk.Function` that outputs a tensor
        name (str): the name of the node in the network
    Returns:
        :class:`cntk.Function`
    '''
    from cntk import square
    x = sanitize_input(x)
    return square(x, name).output()    
def create_binary_convolution_model():

    # Input variables denoting the features and label data
    feature_var = C.input((num_channels, image_height, image_width))
    label_var = C.input((num_classes))

    # apply model to input
    scaled_input = C.element_times(C.constant(0.00390625), feature_var)

    # first layer is ok to be full precision
    z = C.layers.Convolution((3, 3), 64, pad=True,
                             activation=C.relu)(scaled_input)
    z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution(z, (3, 3), 128, channels=64, pad=True)
    z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution(z, (3, 3), 128, channels=128, pad=True)
    z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution(z, (1, 1), num_classes, channels=128, pad=True)
    z = C.layers.AveragePooling((z.shape[1], z.shape[2]))(z)
    z = C.reshape(z, (num_classes, ))

    # Add binary regularization (ala Gang Hua)
    weight_sum = C.constant(0)
    for p in z.parameters:
        if (p.name == "filter"):
            weight_sum = C.plus(weight_sum,
                                C.reduce_sum(C.minus(1, C.square(p))))
    bin_reg = C.element_times(.000005, weight_sum)

    # After the last layer, we need to apply a learnable scale
    SP = C.parameter(shape=z.shape, init=0.001)
    z = C.element_times(z, SP)

    # loss and metric
    ce = C.cross_entropy_with_softmax(z, label_var)
    ce = C.plus(ce, bin_reg)
    pe = C.classification_error(z, label_var)

    return C.combine([z, ce, pe])
示例#26
0
def std_normalized_l2_loss(output, target):
    std_inv = np.array([
        6.6864805402, 5.2904440280, 3.7165409939, 4.1421640454, 8.1537399389,
        7.0312877415, 2.6712380967, 2.6372177876, 8.4253649884, 6.7482162880,
        9.0849960354, 10.2624412692, 3.1325531319, 3.1091179819, 2.7337937590,
        2.7336441031, 4.3542467871, 5.4896293687, 6.2003761588, 3.1290341469,
        5.7677042738, 11.5460919611, 9.9926451700, 5.4259818848, 20.5060642486,
        4.7692101480, 3.1681517575, 3.8582905289, 3.4222250436, 4.6828286809,
        3.0070785113, 2.8936539301, 4.0649030157, 25.3068458731, 6.0030623160,
        3.1151977458, 7.7773542649, 6.2057372469, 9.9494258692, 4.6865422850,
        5.3300697628, 2.7722027974, 4.0658663003, 18.1101618617, 3.5390113731,
        2.7794520068
    ],
                       dtype=np.float32)
    weights = C.constant(value=std_inv)  #.reshape((1, label_dim)))
    dif = output - target
    ret = C.reduce_mean(C.square(C.element_times(dif, weights)))
    return ret
示例#27
0
def create_binary_convolution_model():

    # Input variables denoting the features and label data
    feature_var = C.input((num_channels, image_height, image_width))
    label_var = C.input((num_classes))

    # apply model to input
    scaled_input = C.element_times(C.constant(0.00390625), feature_var)

    # first layer is ok to be full precision
    z = C.layers.Convolution((3, 3), 32, pad=True, activation=C.relu)(scaled_input)
    z = C.layers.MaxPooling((3,3), strides=(2,2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution(z, (3,3), 128, channels=32, pad=True)
    z = C.layers.MaxPooling((3,3), strides=(2,2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution(z, (3,3), 128, channels=128, pad=True)
    z = C.layers.MaxPooling((3,3), strides=(2,2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution(z, (1,1), num_classes, channels=128, pad=True)
    z = C.layers.AveragePooling((z.shape[1], z.shape[2]))(z)
    z = C.reshape(z, (num_classes,))

    # Add binary regularization (ala Gang Hua)
    weight_sum = C.constant(0)
    for p in z.parameters:
        if (p.name == "filter"):
            weight_sum = C.plus(weight_sum, C.reduce_sum(C.minus(1, C.square(p))))
    bin_reg = C.element_times(.000005, weight_sum)

    # After the last layer, we need to apply a learnable scale
    SP = C.parameter(shape=z.shape, init=0.001)
    z = C.element_times(z, SP)

    # loss and metric
    ce = C.cross_entropy_with_softmax(z, label_var)
    ce = C.plus(ce, bin_reg)
    pe = C.classification_error(z, label_var)

    return C.combine([z, ce, pe])
示例#28
0
def layer_normalization(inputs: C.Function,
                        name='layer_normalization') -> C.Function:
    X = C.placeholder(
        inputs.shape,
        (C.Axis.default_batch_axis(), C.Axis.default_dynamic_axis()),
        name=name + '_ph')

    mu = C.reduce_mean(X, name='mu')
    sigma = C.sqrt(C.reduce_mean(C.square(X - mu)), name='sigma')

    result = (X - mu) / sigma

    #region scale + bias
    scale = C.parameter(inputs.shape, init=1, name='scale')
    bias = C.parameter(inputs.shape, init=0, name='bias')
    result = result * scale + bias
    #endregion

    block = C.as_block(result, [(X, X)], name)

    return block(inputs)
示例#29
0
def create_binary_convolution_model():

    feature_var = C.input((num_channels, image_height, image_width))
    label_var = C.input((num_classes))

    scaled_input = C.element_times(C.constant(0.00390625), feature_var)

    z = C.layers.Convolution((3, 3), 32, pad=True,
                             activation=C.relu)(scaled_input)
    z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution((3, 3), 128, channels=32, pad=True)(z)

    z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution((3, 3), 128, channels=128, pad=True)(z)
    z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution((1, 1), num_classes, channels=128, pad=True)(z)
    z = C.layers.AveragePooling((z.shape[1], z.shape[2]))(z)
    z = C.reshape(z, (num_classes, ))

    weight_sum = C.constant(0)
    for p in z.parameters:
        if (p.name == "filter"):
            weight_sum = C.plus(weight_sum,
                                C.reduce_sum(C.minus(1, C.square(p))))
    bin_reg = C.element_times(.000005, weight_sum)

    SP = C.parameter(shape=z.shape, init=0.001)
    z = C.element_times(z, SP)

    ce = C.cross_entropy_with_softmax(z, label_var)
    ce = C.plus(ce, bin_reg)
    pe = C.classification_error(z, label_var)

    return C.combine([z, ce, pe])
示例#30
0
def build_SRResNet_graph(lr_image_shape, hr_image_shape, net):
    inp_dynamic_axes = [C.Axis.default_batch_axis()]
    real_X = C.input(
        lr_image_shape, dynamic_axes=inp_dynamic_axes, name="real_X")
    real_Y = C.input(
        hr_image_shape, dynamic_axes=inp_dynamic_axes, name="real_Y")

    real_X_scaled = real_X/255
    real_Y_scaled = real_Y/255

    genG = net(real_X_scaled)

    G_loss = C.reduce_mean(C.square(real_Y_scaled - genG))

    G_optim = C.adam(G_loss.parameters,
                     lr=C.learning_rate_schedule(
                         [(1, 0.01), (1, 0.001), (98, 0.0001)], C.UnitType.minibatch, 10000),
                     momentum=C.momentum_schedule(0.9), gradient_clipping_threshold_per_sample=1.0)

    G_G_trainer = C.Trainer(genG, (G_loss, None), G_optim)

    return (real_X, real_Y, genG, real_X_scaled, real_Y_scaled, G_optim, G_G_trainer)
示例#31
0
    def Loss(self):
        # Evaluating old actions and values :
        logprobs, state_value, dist_entropy = self.policy.evaluate()

        # Finding the ratio (pi_theta / pi_theta__old): # (importance sampling)
        c_old_logprobs = C.input_variable(logprobs.shape, name='old_log_probs')
        ratios = C.exp(logprobs - C.stop_gradient(c_old_logprobs))

        c_rewards = C.input_variable(1, name='rewards')
        advantages = c_rewards - C.stop_gradient(state_value)

        # Finding Surrogate Loss:
        surr1 = ratios * advantages
        surr2 = C.clip(ratios, 1 - self.eps_clip,
                       1 + self.eps_clip) * advantages
        neglog_loss = -C.element_min(surr1, surr2)
        entropy_loss = -0.01 * dist_entropy
        actor_loss = C.reduce_mean(neglog_loss + entropy_loss)
        critic_loss = 0.5 * C.reduce_mean(C.square(state_value - c_rewards))
        loss = actor_loss + critic_loss

        chunk = {
            'neglog_loss': neglog_loss,
            'entropy_loss': entropy_loss,
            'actor_loss': actor_loss,
            'critic_loss': critic_loss
        }

        trainer = C.Trainer(
            loss, (loss, None),
            C.adam(loss.parameters,
                   C.learning_parameter_schedule_per_sample(self.lr),
                   C.momentum_schedule_per_sample(self.betas[0]),
                   variance_momentum=C.momentum_schedule_per_sample(
                       self.betas[1])))
        # trainer = C.Trainer(loss, (loss, None), C.adam(loss.parameters, C.learning_parameter_schedule(10), C.momentum_schedule(0.9), variance_momentum=C.momentum_schedule(0.999))) # higher learning rate

        return loss, chunk, trainer
示例#32
0
def flow_forward(input_dim: int, act_func_pair: tuple = (None, None), batch_norm: bool = False):
    chunk = {}
    log_det_J = 0

    chunk['input_dim'] = input_dim
    _ph = C.placeholder(input_dim, name='place_holder')
    _out = _ph

    if batch_norm:
        # _bn = C.layers.BatchNormalization(name='batch_norm')(_ph)
        # chunk['scale'] = _bn.parameters[0]
        # chunk['bias'] = _bn.parameters[1]

        chunk['mu'] = C.Constant(np.zeros(shape=input_dim))
        chunk['var'] = C.Constant(np.ones(shape=input_dim))

        _eps = C.Constant(1e-7)
        _mu = C.reduce_mean(_ph, axis=C.Axis.default_batch_axis())
        _var = C.reduce_mean(C.square(_ph-_mu), axis=C.Axis.default_batch_axis())

        chunk['muB'] = _mu
        chunk['varB'] = _var

        # _bn = (_ph-chunk['mu'])/C.sqrt(chunk['var']+_eps)
        _bn = C.sqrt(chunk['var']+_eps)*_ph + chunk['mu']
        _ph = _bn

        log_det_J += -0.5*C.reduce_sum(C.log((_var+_eps)))
        # log_det_J += C.reduce_sum(C.log())

    chunk['W_rot_mat'] = _W = C.parameter((input_dim, input_dim))
    _W.value = random_rotation_matrix = special_ortho_group.rvs(input_dim)
    # _W.value = np.roll(np.eye(input_dim),input_dim//2,axis=0)
    _out = _ph@_W
    log_det_J += C.log(C.abs(C.det(_W))) # or # log_det_J += C.slogdet(_W)[1]
    
    _half_dim = input_dim//2
    _x1 = _out[:_half_dim]
    _x2 = _out[_half_dim:]

    _log_s_func, _t_func = act_func_pair
    if _log_s_func is None: # basic network
        _log_s_func = C.layers.Sequential([
            C.layers.Dense(256, C.leaky_relu),
            C.layers.Dense(256, C.leaky_relu),
            C.layers.Dense(_half_dim, C.tanh),
        ])#(C.placeholder(input_dim, name='place_holder'))
    if _t_func is None: # basic network
        _t_func = C.layers.Sequential([
            C.layers.Dense(256, C.leaky_relu),
            C.layers.Dense(256, C.leaky_relu),
            C.layers.Dense(_half_dim),
        ])#(C.placeholder(input_dim, name='place_holder'))

    chunk['log_s_func'] = _log_s_func
    chunk['t_func'] = _t_func

    _log_s, _t = _log_s_func(_x2), _t_func(_x2)

    _s = C.exp(_log_s)

    _y1 = _s*_x1 + _t
    _y2 = _x2

    _Y = C.splice(_y1, _y2)
    chunk['output'] = _Y

    log_det_J += C.reduce_sum(_log_s)

    return _Y, log_det_J, chunk
示例#33
0
data_dir = os.path.join("..", "Examples", "Image", "DataSets", "MNIST")
if not os.path.exists(data_dir):
    data_dir = os.path.join("data", "MNIST")

print('Writing train text file...')
savetxt(os.path.join(data_dir, "Train-28x28_cntk_text.txt"), train)

print('Writing test text file...')
savetxt(os.path.join(data_dir, "Test-28x28_cntk_text.txt"), test)

print('Done')

input = C.input_variable(input_dim)
label = C.input_variable(num_output_classes)
normalize_input = input / 255.0
squared_input = C.square(input / 255.0)
sqrt_input = C.sqrt(input / 255.0)

z = create_model(C.splice(normalize_input, squared_input, sqrt_input))

loss = C.cross_entropy_with_softmax(z, label)

label_error = C.classification_error(z, label)

lr_schedule = C.learning_parameter_schedule(learning_rate)

learner = C.sgd(z.parameters, lr_schedule)

trainer = C.Trainer(z, (loss, label_error), [learner])

data_found = False
示例#34
0
def loss_fun(output, label):
    length = C.sequence.reduce_sum(C.reduce_sum(output) * 0 + 1)
    return C.sequence.reduce_sum(C.reduce_sum(
        C.square(output - label))) / length
示例#35
0
def true_density(z):
    z1, z2 = z[0], z[1]

    w1 = lambda x: C.sin(2 * np.pi * x/4)
    u = 0.5 * C.square((z2 - w1(z1))/0.4)
    dummy = C.ones_like(u) * 1e7

    # u = C.element_select(C.less_equal(z1,4), u, dummy)
    cond = C.less_equal(z1,4)
    u = C.element_select(cond, u, dummy) # u = cond*u + (1-cond)*dummy

    return C.exp(-u)

#%%
h = lambda x: C.tanh(x)
h_prime = lambda x: 1 - C.square(C.tanh(x))

base_dist = MultivariateNormalDiag(loc=[0., 0.], scale_diag=[1., 1.])
z_0 = C.input_variable(base_dist.size(), name='sampled')
z_prev = z_0
sum_log_det_jacob = 0.

initializer = C.initializer.uniform(1)
for i in range(K):
    u = C.parameter((2), name='u', init=initializer)
    w = C.parameter((2), name='w', init=initializer)
    b = C.parameter((1), name='b', init=initializer)

    psi = h_prime(C.dot(w, z_prev)+b) * w
    det_jacob = C.abs(1 + C.dot(u, psi))