示例#1
0
文件: train.py 项目: gzt200361/CNTK
def calculate_loss_vector(network, path, location_path, communicator):
    source = DataSource(path, opt.vocab_file, location_path,
                        opt.seqlength, opt.batchsize)
    # the curr row -> the curr col
    # the curr col -> the next row
    row_loss = C.log(C.softmax(network['model'].outputs[0]))
    col_loss = C.log(C.softmax(network['model'].outputs[1]))
    loss = C.combine([row_loss, col_loss])
    row_loss_vector = np.zeros((opt.vocabsize, vocab_sqrt))
    col_loss_vector = np.zeros((opt.vocabsize, vocab_sqrt))

    flag = True
    while flag:
        mb = source.next_minibatch(opt.seqlength * opt.batchsize * Communicator.num_workers(),
                                   Communicator.num_workers(),
                                   communicator.rank())
        result = loss.eval({
            network['row']: mb[source.input1],
            network['col']: mb[source.input2],
        })
        row_prob = result[loss.outputs[0]]
        col_prob = result[loss.outputs[1]]
        label1 = mb[source.word1].asarray()
        label2 = mb[source.word2].asarray()
        sequences = len(label1)
        for i in range(sequences):
            seqlength = len(row_prob[i])
            for j in range(seqlength):
                row_word = int(label1[i][j][0])
                col_word = int(label2[i][j][0])
                row_loss_vector[row_word] -= row_prob[i][j]
                col_loss_vector[col_word] -= col_prob[i][j]
        flag = not mb[source.input1].sweep_end
    return col_loss_vector, row_loss_vector
示例#2
0
def flow_reverse(chunk):
    input_dim = chunk['input_dim']
    log_det_J = 0
    _half_dim = input_dim//2

    _ph = C.placeholder(input_dim, name='place_holder')
    _log_s_func = chunk['log_s_func']
    _t_func = chunk['t_func']

    _y1, _y2 = _ph[:_half_dim], _ph[_half_dim:]
    _log_s = _log_s_func(_y2)
    _t = _t_func(_y2)
    _s = C.exp(_log_s)
    _x1 = (_y1-_t)/_s
    _x2 = _y2
    _X = C.splice(_x1, _x2)

    log_det_J += C.reduce_sum(C.log(C.abs(_s)))

    _w = chunk['W_rot_mat']
    chunk['W_rot_mat_inv'] = _inv_w = C.Constant(np.linalg.inv(_w.value), name='inv_W')
    _out = _X@_inv_w
    log_det_J += input_dim*C.log(C.det(_inv_w))

    # if 'scale' in chunk:
    #     _out -= chunk['bias']
    #     _out /= chunk['scale']
    #     log_det_J += input_dim*C.reduce_sum(C.log(C.abs(chunk['scale'])))

    # _out -= chunk['b']
    # _out @= _inv_w

    return _out, log_det_J
示例#3
0
文件: train.py 项目: zwlshine/CNTK
def calculate_loss_vector(network, path, location_path, communicator):
    source = DataSource(path, opt.vocab_file, location_path,
                        opt.seqlength, opt.batchsize)
    # the curr row -> the curr col
    # the curr col -> the next row
    row_loss = C.log(C.softmax(network['model'].outputs[0]))
    col_loss = C.log(C.softmax(network['model'].outputs[1]))
    loss = C.combine([row_loss, col_loss])
    row_loss_vector = np.zeros((opt.vocabsize, vocab_sqrt))
    col_loss_vector = np.zeros((opt.vocabsize, vocab_sqrt))

    flag = True
    while flag:
        mb = source.next_minibatch(opt.seqlength * opt.batchsize * Communicator.num_workers(),
                                   Communicator.num_workers(),
                                   communicator.rank())
        result = loss.eval({
            network['row']: mb[source.input1],
            network['col']: mb[source.input2],
        })
        row_prob = result[loss.outputs[0]]
        col_prob = result[loss.outputs[1]]
        label1 = mb[source.word1].asarray()
        label2 = mb[source.word2].asarray()
        sequences = len(label1)
        for i in range(sequences):
            seqlength = len(row_prob[i])
            for j in range(seqlength):
                row_word = int(label1[i][j][0])
                col_word = int(label2[i][j][0])
                row_loss_vector[row_word] -= row_prob[i][j]
                col_loss_vector[col_word] -= col_prob[i][j]
        flag = not mb[source.input1].sweep_end
    return col_loss_vector, row_loss_vector
def build_graph(noise_shape, image_shape, G_progress_printer, D_progress_printer):
    input_dynamic_axes = [C.Axis.default_batch_axis()]
    Z = C.input_variable(noise_shape, dynamic_axes=input_dynamic_axes)
    X_real = C.input_variable(image_shape, dynamic_axes=input_dynamic_axes)
    X_real_scaled = 2*(X_real / 255.0) - 1.0

    # Create the model function for the generator and discriminator models
    X_fake = generator(Z)
    D_real = discriminator(X_real_scaled)
    D_fake = D_real.clone(
        method = 'share',
        substitutions = {X_real_scaled.output: X_fake.output}
    )

    # Create loss functions and configure optimazation algorithms
    G_loss = 1.0 - C.log(D_fake)
    D_loss = -(C.log(D_real) + C.log(1.0 - D_fake))

    G_learner = C.fsadagrad(
       parameters = X_fake.parameters,
        lr = C.learning_parameter_schedule_per_sample(lr),
        momentum = C.momentum_schedule_per_sample(0.9985724484938566)
    )
    D_learner = C.fsadagrad(
        parameters = D_real.parameters,
        lr = C.learning_parameter_schedule_per_sample(lr),
        momentum = C.momentum_schedule_per_sample(0.9985724484938566)
    )

    DistG_learner = C.train.distributed.data_parallel_distributed_learner(G_learner)
    
    # The following API marks a learner as the matric aggregator, which is used by 
    # the trainer to determine the training progress.
    # It is required, only when more than one learner is provided to a *single* trainer. 
    # In this example, we use two trainers each with a single learner, so it 
    # is not required and automatically set by CNTK for each single learner. However, if you 
    # plan to use both learners with a single trainer, then it needs to be call before 
    # creating the trainer.
    #DistG_learner.set_as_metric_aggregator()

    DistD_learner = C.train.distributed.data_parallel_distributed_learner(D_learner)

    # Instantiate the trainers
    G_trainer = C.Trainer(
        X_fake,
        (G_loss, None),
        DistG_learner,
        G_progress_printer
    )
    D_trainer = C.Trainer(
        D_real,
        (D_loss, None),
        DistD_learner,
        D_progress_printer
    )

    return X_real, X_fake, Z, G_trainer, D_trainer
示例#5
0
def cost_func(training_mode, prediction, target):
    '''
    We use cross entropy in most mode, except for the multi-label mode, which require treating
    multiple labels exactly the same.
    '''
    train_loss = None
    if training_mode == 'majority' or training_mode == 'probability' or training_mode == 'crossentropy': 
        # Cross Entropy.
        train_loss = ct.negate(ct.reduce_sum(ct.element_times(target, ct.log(prediction)), axis=-1))
    elif training_mode == 'multi_target':
        train_loss = ct.negate(ct.log(ct.reduce_max(ct.element_times(target, prediction), axis=-1)))

    return train_loss
示例#6
0
def true_density(z):
    z1, z2 = z[0], z[1]
    norm = C.sqrt(C.square(z1) + C.square(z2))
    exp1 = C.exp(-0.5 * C.square((z1 - 2) / 0.8))
    exp2 = C.exp(-0.5 * C.square((z1 + 2) / 0.8))
    u = 0.5 * C.square(((norm - 4) / 0.4)) - C.log(exp1 + exp2)
    return C.exp(-u)
示例#7
0
def cross_entropy_with_full_softmax(
    output,  # Node providing the output of the lstm layers
    target_vector,  # Node providing the expected labels
    sv_dim, 
    vocab_dim
    ):
    sv_vector = output.outputs[3]
    z = output.outputs[0]
    zT = C.times_transpose(z, target_vector)
    # cross entropy loss with softmax function
    ce = - C.log(zT)
    # the error 
    zMax = C.reduce_max(z)
    error = C.less(zT, zMax)
    ce = sequence.reduce_sum(ce)
    # discourages the network from turning more than one gate off in a single time step.
    sumc = C.abs(C.sequence.slice(sv_vector, 1, 0) - C.sequence.slice(sv_vector, 0, -1))
    sumc = sequence.reduce_sum(0.0001 * C.pow(100.0, sumc))
    #ce += sumc
    # penalise generated utterances that failed to render all the required slots
    sumc += C.abs(C.sequence.last(sv_vector))
    sumc += C.abs(C.sequence.first(sv_vector) - output.outputs[4])
    sumc = C.reduce_sum(sumc)
    ce = C.reduce_sum(ce)
    ce += sumc
    return ce, error
示例#8
0
def cross_entropy_with_sampled_softmax(
    hidden_vector,  # Node providing the output of the recurrent layers
    target_vector,  # Node providing the expected labels (as sparse vectors)
    vocab_dim,  # Vocabulary size
    hidden_dim,  # Dimension of the hidden vector
    num_samples,  # Number of samples to use for sampled softmax
    sampling_weights,  # Node providing weights to be used for the weighted sampling
    allow_duplicates=False  # Boolean flag to control whether to use sampling with replacement (allow_duplicates == True) or without replacement.
):
    bias = C.layers.Parameter(shape=(vocab_dim, 1), init=0)
    weights = C.layers.Parameter(shape=(vocab_dim, hidden_dim),
                                 init=C.initializer.glorot_uniform())

    sample_selector_sparse = C.random_sample(
        sampling_weights, num_samples,
        allow_duplicates)  # sparse matrix [num_samples * vocab_size]
    if use_sparse:
        sample_selector = sample_selector_sparse
    else:
        # Note: Sampled softmax with dense data is only supported for debugging purposes.
        # It might easily run into memory issues as the matrix 'I' below might be quite large.
        # In case we wan't to a dense representation for all data we have to convert the sample selector
        I = C.Constant(np.eye(vocab_dim, dtype=np.float32))
        sample_selector = C.times(sample_selector_sparse, I)

    inclusion_probs = C.random_sample_inclusion_frequency(
        sampling_weights, num_samples,
        allow_duplicates)  # dense row [1 * vocab_size]
    log_prior = C.log(inclusion_probs)  # dense row [1 * vocab_dim]

    print("hidden_vector: " + str(hidden_vector.shape))
    wS = C.times(sample_selector, weights,
                 name='wS')  # [num_samples * hidden_dim]
    print("ws:" + str(wS.shape))
    zS = C.times_transpose(wS, hidden_vector, name='zS1') + C.times(
        sample_selector, bias, name='zS2') - C.times_transpose(
            sample_selector, log_prior, name='zS3')  # [num_samples]

    # Getting the weight vector for the true label. Dimension hidden_dim
    wT = C.times(target_vector, weights, name='wT')  # [1 * hidden_dim]
    zT = C.times_transpose(wT, hidden_vector, name='zT1') + C.times(
        target_vector, bias, name='zT2') - C.times_transpose(
            target_vector, log_prior, name='zT3')  # [1]

    zSReduced = C.reduce_log_sum_exp(zS)

    # Compute the cross entropy that is used for training.
    # We don't check whether any of the classes in the random samples coincides with the true label, so it might happen that the true class is counted
    # twice in the normalizing denominator of sampled softmax.
    cross_entropy_on_samples = C.log_add_exp(zT, zSReduced) - zT

    # For applying the model we also output a node providing the input for the full softmax
    z = C.times_transpose(weights, hidden_vector) + bias
    z = C.reshape(z, shape=(vocab_dim))

    zSMax = C.reduce_max(zS)
    error_on_samples = C.less(zT, zSMax)
    return (z, cross_entropy_on_samples, error_on_samples)
示例#9
0
def create_network(feature_dim = 40, num_classes=256, feature_mean_file=None, feature_inv_stddev_file=None,
                       feature_norm_files = None, label_prior_file = None, context=(0,0), model_type=None):

    def MyMeanVarNorm(feature_mean_file, feature_inv_stddev_file):
        m = C.reshape(load_ascii_vector(feature_mean_file,'feature_mean'), shape=(1, feature_dim))
        s = C.reshape(load_ascii_vector(feature_inv_stddev_file,'feature_invstddev'), shape=(1,feature_dim))
        def _func(operand):
            return C.reshape(C.element_times(C.reshape(operand,shape=(1+context[0]+context[1], feature_dim)) - m, s), shape=operand.shape)
        return _func


    def MyDNNLayer(hidden_size=128, num_layers=2):
        return C.layers.Sequential([
            C.layers.For(range(num_layers), lambda: C.layers.Dense(hidden_size, activation=C.sigmoid))
        ])

    def MyBLSTMLayer(hidden_size=128, num_layers=2):
        W = C.Parameter((C.InferredDimension, hidden_size), init=C.he_normal(1.0), name='rnn_parameters')
        def _func(operand):
            return C.optimized_rnnstack(operand, weights=W, hidden_size=hidden_size, num_layers=num_layers, bidirectional=True, recurrent_op='lstm' )
        return _func

    # Input variables denoting the features and label data
    feature_var = C.sequence.input_variable(feature_dim * (1+context[0]+context[1]))
    label_var = C.sequence.input_variable(num_classes)

    feature_norm = MyMeanVarNorm(feature_mean_file, feature_inv_stddev_file)(feature_var)
    label_prior = load_ascii_vector(label_prior_file, 'label_prior')
    log_prior = C.log(label_prior)

    if (model_type=="DNN"):
        net = MyDNNLayer(512,4)(feature_norm)
    elif (model_type=="BLSTM"):
        net = MyBLSTMLayer(512,2)(feature_norm)
    else:
        raise RuntimeError("model_type must be DNN or BLSTM")

    out = C.layers.Dense(num_classes, init=C.he_normal(scale=1/3))(net)

    # loss and metric
    ce = C.cross_entropy_with_softmax(out, label_var)
    pe = C.classification_error(out, label_var)
    ScaledLogLikelihood = C.minus(out, log_prior, name='ScaledLogLikelihood')

    # talk to the user
    C.logging.log_number_of_parameters(out)
    print()

    return {
        'feature': feature_var,
        'label': label_var,
        'output': out,
        'ScaledLogLikelihood': ScaledLogLikelihood,
        'ce': ce,
        'pe': pe,
        'final_hidden': net # adding last hidden layer output for future use in CTC tutorial
    }
示例#10
0
def gaussian_mdn_loss(output_vector, target_vector, nmix: int, ndim: int):
    """
    Loss function for gaussian mixture density network. Usually used for regression problems.
    Mixture density networks are useful when trying to represent arbitrary conditional probabilities
    the same way a conventional neural network can represent arbitrary functions.

    Example:
        ndim, nmix = 1, 3
        input_tensor = C.input_variable(1, name="input_tensor")
        target_tensor = C.input_variable(1, name="target_tensor")

        # model
        inner = Dense(50, activation=C.relu)(input_tensor)
        inner = Dense(50, activation=C.relu)(inner)
        prediction_tensor = Dense((ndim + 2) * nmix, activation=None)(inner)

        loss = gaussian_mdn_loss(prediction_tensor, target_tensor, nmix=nmix, ndim=ndim)

    Arguments:
        output_vector: network output
        target_vector: ground truths (typically a continuous variable)
        nmix (int): number of mixtures
        ndim (int): number of dimensions in a gaussian kernel

    Returns:
        :class:`~cntk.ops.functions.Function`
    """

    @C.typemap
    def gaussian_mdn_phi(target, mu, sigma, ndim: int):
        """
        Calculates phi between the target tensor and the network prediction
        Does not assumes independence between components of target.

        Arguments:
            target: target tensor with shape (ndim, )
            mu: means of gaussian mdn with shape (nmix, ndim)
            sigma: sigma of gaussian mdn
            nmix (int): number of mixtures
            ndim (int): number of dimensions in gaussian

        Returns:
            :class:`~cntk.ops.functions.Function`
        """
        if not len(mu.shape) == 2:
            raise ValueError("mu {0} must have shape (nmix, ndim)".format(mu.shape))

        t = C.expand_dims(target, axis=0)

        exp_term = C.exp(C.negate(C.square(C.reduce_l2(t - mu, axis=-1)) / (2 * C.square(sigma))))
        factor = C.reciprocal((2 * pi) ** (ndim / 2) * C.pow(sigma, ndim))
        return factor * exp_term

    alpha, mu, sigma = gaussian_mdn_coeff(output_vector, nmix=nmix, ndim=ndim)
    phi = gaussian_mdn_phi(target_vector, mu, sigma, ndim=ndim)
    loss = C.negate(C.log(C.clip(C.reduce_sum(alpha * phi, axis=0), 1e-10, 1e10)))
    return loss
示例#11
0
def test_grad_custimized_root():
    x = C.input(shape=(1, ), needs_gradient=True)
    y = C.sqrt(x)
    y2 = C.log(x)
    combine = C.combine([y.output, y2.output])
    a = np.asarray([1, 4, 16], dtype=np.float32).reshape(3, 1)
    grads = combine.grad({x: a}, grad_root=y.output)
    expect_grad = np.asarray([[0.5], [0.25], [0.125]], dtype=np.float32)
    assert np.array_equal(grads, expect_grad)
示例#12
0
def test_grad_custimized_root():
    x = C.input_variable(shape=(1,), needs_gradient=True)
    y = C.sqrt(x)
    y2 = C.log(x)
    combine = C.combine([y.output, y2.output])
    a = np.asarray([1,4,16], dtype=np.float32).reshape(3,1)
    grads = combine.grad({x:a}, grad_root = y.output)
    expect_grad = np.asarray([[0.5],[0.25],[0.125]], dtype=np.float32)
    assert np.array_equal(grads, expect_grad)
示例#13
0
 def __local_response_normalization(self, k, n, alpha, beta, name=''):
     x = cntk.placeholder(name='lrn_arg')
     x2 = cntk.square(x)
     x2s = cntk.reshape(x2, (1, cntk.InferredDimension), 0, 1)
     W = cntk.constant(alpha / (2 * n + 1), (1, 2 * n + 1, 1, 1), name='W')
     y = cntk.convolution(W, x2s)
     b = cntk.reshape(y, cntk.InferredDimension, 0, 2)
     den = cntk.exp(beta * cntk.log(k + b))
     apply_x = cntk.element_divide(x, den)
     return apply_x
示例#14
0
 def lrn(x, depth_radius, bias, alpha, beta, name=''):
     x2 = C.square(x)
     # reshape to insert a fake singleton reduction dimension after the 3th axis (channel axis). Note Python axis order and BrainScript are reversed.
     x2s = C.reshape(x2, (1, C.InferredDimension), 0, 1)
     W = C.constant(alpha/(2*depth_radius+1), shape=(1,2*depth_radius+1,1,1), dtype=dtype, name='W')
     # 3D convolution with a filter that has a non 1-size only in the 3rd axis, and does not reduce since the reduction dimension is fake and 1
     y = C.convolution (W, x2s)
     # reshape back to remove the fake singleton reduction dimension
     b = C.reshape(y, C.InferredDimension, 0, 2)
     den = C.exp(beta * C.log(bias + b))
     return C.element_divide(x, den)
示例#15
0
文件: NIN_test2.py 项目: lizishu/CNTK
def LocalResponseNormalization(k, n, alpha, beta, name=''):
    x = C.placeholder(name='lrn_arg')
    x2 = C.square(x)
    x2s = C.reshape(x2, (1, C.InferredDimension), 0, 1)
    W = C.constant(alpha / (2 * n + 1), (1, 2 * n + 1, 1, 1), name='W')
    y = C.convolution(W, x2s)
    b = C.reshape(y, C.InferredDimension, 0, 2)
    den = C.exp(beta * C.log(k + b))
    apply_x = C.element_divide(x, den)

    return apply_x
示例#16
0
 def lrn(x, depth_radius, bias, alpha, beta, name=''):
     x2 = C.square(x)
     # reshape to insert a fake singleton reduction dimension after the 3th axis (channel axis). Note Python axis order and BrainScript are reversed.
     x2s = C.reshape(x2, (1, C.InferredDimension), 0, 1)
     W = C.constant(alpha/(2*depth_radius+1), shape=(1,2*depth_radius+1,1,1), dtype=dtype, name='W')
     # 3D convolution with a filter that has a non 1-size only in the 3rd axis, and does not reduce since the reduction dimension is fake and 1
     y = C.convolution (W, x2s)
     # reshape back to remove the fake singleton reduction dimension
     b = C.reshape(y, C.InferredDimension, 0, 2)
     den = C.exp(beta * C.log(bias + b))
     return C.element_divide(x, den)
示例#17
0
    def build_trainer(self):

        # Set the learning rate, and the momentum parameters for the Adam optimizer.
        lr = learning_rate_schedule(self.lr, UnitType.minibatch)
        beta1 = momentum_schedule(0.9)
        beta2 = momentum_schedule(0.99)

        # Calculate the losses.
        loss_on_v = cntk.squared_error(self.R, self.v)
        pi_a_s = cntk.log(cntk.times_transpose(self.pi, self.action))

        loss_on_pi = cntk.variables.Constant(-1) * (cntk.plus(
            cntk.times(pi_a_s, cntk.minus(self.R, self.v_calc)),
            0.01 * cntk.times_transpose(self.pi, cntk.log(self.pi))))
        #loss_on_pi = cntk.times(pi_a_s, cntk.minus(self.R, self.v_calc))

        self.tensorboard_v_writer = TensorBoardProgressWriter(
            freq=10, log_dir="tensorboard_v_logs", model=self.v)
        self.tensorboard_pi_writer = TensorBoardProgressWriter(
            freq=10, log_dir="tensorboard_pi_logs", model=self.pi)

        # tensorboard --logdir=tensorboard_pi_logs  http://localhost:6006/
        # tensorboard --logdir=tensorboard_v_logs  http://localhost:6006/

        # Create the trainiers.
        self.trainer_v = cntk.Trainer(self.v, (loss_on_v), [
            adam(self.pms_v,
                 lr,
                 beta1,
                 variance_momentum=beta2,
                 gradient_clipping_threshold_per_sample=2,
                 l2_regularization_weight=0.01)
        ], self.tensorboard_v_writer)
        self.trainer_pi = cntk.Trainer(self.pi, (loss_on_pi), [
            adam(self.pms_pi,
                 lr,
                 beta1,
                 variance_momentum=beta2,
                 gradient_clipping_threshold_per_sample=2,
                 l2_regularization_weight=0.01)
        ], self.tensorboard_pi_writer)
示例#18
0
def criteria(label, output, block_size, c_classes, weights):
    ''' Define the loss function and metric '''
    probs = cntk.softmax(output, axis=0)
    log_probs = cntk.log(probs)
    ce = cntk.times(weights,
                    -cntk.element_times(log_probs, label),
                    output_rank=2)
    mean_ce = cntk.reduce_mean(ce)
    _, w, h = label.shape
    pe = cntk.classification_error(probs, label, axis=0) - \
     cntk.reduce_sum(cntk.slice(label, 0, 0, 1)) / cntk.reduce_sum(label)
    return (mean_ce, pe)
示例#19
0
    def __init__(self, p, eps=1e-7):
        if isinstance(p, (C.Variable, C.Function)):
            self.p = C.squeeze(p)
        else:
            self.p = C.Constant(np.squeeze(p))

        self.eps = C.Constant(eps, name='eps')
        self.c = self.p.shape[0]

        self.prob = self.p / (self.eps + C.reduce_sum(self.p))
        self.logits = C.log(self.prob)
        self.accum_prob = self.prob @ C.Constant(
            (1 - np.tri(self.prob.shape[-1], k=-1)))

        p_log_p = self.logits * self.prob
        self._entropy = -C.reduce_sum(p_log_p)

        dist = C.input_variable(1, name='category index')
        # method 1
        self._log_prob = C.log(
            C.reduce_sum(self.prob * C.one_hot(dist, self.c)))
示例#20
0
def LocalResponseNormalization(k, n, alpha, beta, name=''):
    x = C.placeholder(name='lrn_arg')
    x2 = C.square(x)
    # reshape to insert a fake singleton reduction dimension after the 3th axis (channel axis). Note Python axis order and BrainScript are reversed.
    x2s = C.reshape(x2, (1, C.InferredDimension), 0, 1)
    W = C.constant(alpha/(2*n+1), (1,2*n+1,1,1), name='W')
    # 3D convolution with a filter that has a non 1-size only in the 3rd axis, and does not reduce since the reduction dimension is fake and 1
    y = C.convolution (W, x2s)
    # reshape back to remove the fake singleton reduction dimension
    b = C.reshape(y, C.InferredDimension, 0, 2)
    den = C.exp(beta * C.log(k + b))
    apply_x = C.element_divide(x, den)
    return apply_x
示例#21
0
def LocalResponseNormalization(k, n, alpha, beta, name=''):
    x = C.placeholder(name='lrn_arg')
    x2 = C.square(x)
    # reshape to insert a fake singleton reduction dimension after the 3th axis (channel axis). Note Python axis order and BrainScript are reversed.
    x2s = C.reshape(x2, (1, C.InferredDimension), 0, 1)
    W = C.constant(alpha / (2 * n + 1), (1, 2 * n + 1, 1, 1), name='W')
    # 3D convolution with a filter that has a non 1-size only in the 3rd axis, and does not reduce since the reduction dimension is fake and 1
    y = C.convolution(W, x2s)
    # reshape back to remove the fake singleton reduction dimension
    b = C.reshape(y, C.InferredDimension, 0, 2)
    den = C.exp(beta * C.log(k + b))
    apply_x = C.element_divide(x, den)
    return apply_x
示例#22
0
def binary_focal_loss(output, target, alpha=1., gamma=2., name=''):
    """
    CNTK binary class implementation of focal loss from "Focal Loss for Dense Object Detection" by Tsung-Yi Lin et al.

    Focal loss add a factor (1 - p) ^ gamma to the standard cross entropy criterion. Setting gamma > 0 reduces the
    relative loss for well-classified examples (p > .5), putting more focus on hard, misclassified examples.
    Focal loss enables the training of highly ccurate dense object detectors in the presence of vast
    numbers of easy background examples or dataset with extreme class imbalance (e.g. 1:1000).

    This implementation will work in semantic segmentation of images i.e. output can
    be a rank 2 tensor of shape (row, col). Output will be correct even in edge case where entire image is background.

    Maths:
        Focal Loss = - alpha * (1 - p) ^ gamma * log ( p )

    Arguments:
        output: the computed posterior probability from the network (typ. a ``sigmoid``). Can be
          from shape (1,) for simple classification up to shape (row, col) for semantic segmentation of images.
        target: ground-truth label, 0 or 1
        alpha (float): sacling factor. weight assigned to rare classes.
          should slightly decrease as gamma increase. (defaults 1)
        gamma (float): Larger gamma reduces relative loss for well-classified examples.
          Recommended range [0.5, 5] (Default 2.)
        axis (int or :class:`~cntk.axis.Axis`, optional): if given, focal loss will be computed
                along this axis
        name (str, optional): the name of the Function instance in the network

    Returns:
        :class:`~cntk.ops.functions.Function`

    """
    logprobA = target * C.log(output)
    logprobB = (1 - target) * C.log(1 - output)

    factorA = C.pow(1 - output, gamma)
    factorB = C.pow(output, gamma)

    return C.negate(alpha * (factorA * logprobA + factorB * logprobB), name=name)
示例#23
0
def multivariate_kl_divergence(input_layer):
    _dim = input_layer.shape[0]

    out_value = C.unpack_batch(input_layer)
    _mu1 = C.transpose(C.reduce_mean(out_value, axis=0), [1, 0])
    _sigma1 = C.cov2(input_layer)

    _mu2 = C.zeros_like(_mu1)
    _sigma2 = C.Constant(np.eye(_dim))
    _sigma2_inv = _sigma2  # identity matrix

    return 0.5 * (C.log(C.det(_sigma2) / C.det(_sigma1)) - _dim +
                  C.trace(_sigma2_inv @ _sigma1) + C.transpose(
                      (_mu2 - _mu1), [1, 0]) @ _sigma2_inv @ (_mu2 - _mu1))
示例#24
0
def cross_entropy_with_sampled_softmax(
    hidden_vector,           # Node providing the output of the recurrent layers
    target_vector,           # Node providing the expected labels (as sparse vectors)
    vocab_dim,               # Vocabulary size
    hidden_dim,              # Dimension of the hidden vector
    num_samples,             # Number of samples to use for sampled softmax
    sampling_weights,        # Node providing weights to be used for the weighted sampling
    allow_duplicates = False # Boolean flag to control whether to use sampling with replacement (allow_duplicates == True) or without replacement.
    ):
    bias = C.Parameter(shape = (vocab_dim, 1), init = 0)
    weights = C.Parameter(shape = (vocab_dim, hidden_dim), init = C.initializer.glorot_uniform())

    sample_selector_sparse = C.random_sample(sampling_weights, num_samples, allow_duplicates) # sparse matrix [num_samples * vocab_size]
    if use_sparse:
        sample_selector = sample_selector_sparse
    else:
        # Note: Sampled softmax with dense data is only supported for debugging purposes.
        # It might easily run into memory issues as the matrix 'I' below might be quite large.
        # In case we wan't to a dense representation for all data we have to convert the sample selector
        I = C.Constant(np.eye(vocab_dim, dtype=np.float32))
        sample_selector = C.times(sample_selector_sparse, I)

    inclusion_probs = C.random_sample_inclusion_frequency(sampling_weights, num_samples, allow_duplicates) # dense row [1 * vocab_size]
    log_prior = C.log(inclusion_probs) # dense row [1 * vocab_dim]


    print("hidden_vector: "+str(hidden_vector.shape))
    wS = C.times(sample_selector, weights, name='wS') # [num_samples * hidden_dim]
    print("ws:"+str(wS.shape))
    zS = C.times_transpose(wS, hidden_vector, name='zS1') + C.times(sample_selector, bias, name='zS2') - C.times_transpose (sample_selector, log_prior, name='zS3')# [num_samples]

    # Getting the weight vector for the true label. Dimension hidden_dim
    wT = C.times(target_vector, weights, name='wT') # [1 * hidden_dim]
    zT = C.times_transpose(wT, hidden_vector, name='zT1') + C.times(target_vector, bias, name='zT2') - C.times_transpose(target_vector, log_prior, name='zT3') # [1]


    zSReduced = C.reduce_log_sum_exp(zS)

    # Compute the cross entropy that is used for training.
    # We don't check whether any of the classes in the random samples coincides with the true label, so it might happen that the true class is counted
    # twice in the normalizing denominator of sampled softmax.
    cross_entropy_on_samples = C.log_add_exp(zT, zSReduced) - zT

    # For applying the model we also output a node providing the input for the full softmax
    z = C.times_transpose(weights, hidden_vector) + bias
    z = C.reshape(z, shape = (vocab_dim))

    zSMax = C.reduce_max(zS)
    error_on_samples = C.less(zT, zSMax)
    return (z, cross_entropy_on_samples, error_on_samples)
示例#25
0
def test_data_resize():
    batch_size = 8
    w = C.parameter(shape=(3, 2), name='w1')
    x = C.input_variable(shape=[3], name='x')
    y = C.softmax(C.times(x, w))
    y = C.unpack_batch(y)
    y = C.reshape(y, [batch_size * 2])
    loss = C.reduce_mean(-C.log(y))

    learning_rate = 0.01
    lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)
    learner = C.sgd(y.parameters, lr_schedule, gradient_clipping_threshold_per_sample=1.0)
    trainer = C.Trainer(y, (loss), [learner])

    features = np.random.randn(batch_size, 3)
    trainer.train_minibatch({x: features})
示例#26
0
def test_data_resize():
    batch_size = 8
    w = C.parameter(shape=(3, 2), name='w1')
    x = C.input_variable(shape=[3], name='x')
    y = C.softmax(C.times(x, w))
    y = C.unpack_batch(y)
    y = C.reshape(y, [batch_size * 2])
    loss = C.reduce_mean(-C.log(y))

    learning_rate = 0.01
    lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)
    learner = C.sgd(y.parameters, lr_schedule, gradient_clipping_threshold_per_sample=1.0)
    trainer = C.Trainer(y, (loss), [learner])

    features = np.random.randn(batch_size, 3)
    trainer.train_minibatch({x: features})
    def MyDNNLayer(hidden_size=128, num_layers=2):
    return C.layers.Sequential([
        C.layers.For(range(num_layers), lambda: C.layers.Dense(hidden_size)>> C.layers.BatchNormalization()>>C.sigmoid>>C.layers.Dropout(.3))
    ])
    
    def MyBLSTMLayer(hidden_size=128, num_layers=2):                                                        
        W = C.Parameter((C.InferredDimension, hidden_size), init=C.he_normal(1.0), name='rnn_parameters') #initialize weights of RNN #'C.Parameter'--> it creates a parameter tensor
        def _func(operand):        #operand represents input data
            return C.optimized_rnnstack(operand, weights=W, hidden_size=hidden_size, num_layers=num_layers, bidirectional=True, recurrent_op='lstm' )
        return _func

    # Input variables denoting the features and label data
                                                  #shape of input data
    feature_var = C.sequence.input_variable(feature_dim * (1+context[0]+context[1]))  #It creates an input in the network: a place where data, such as features and labels, should be provided.
    label_var = C.sequence.input_variable(num_classes)
                                                                                           ###1st layer
    feature_norm = MyMeanVarNorm(feature_mean_file, feature_inv_stddev_file)(feature_var)  #feature_var is operand in _fun in MyMeanVarNorm function
    label_prior = load_ascii_vector(label_prior_file, 'label_prior')
    log_prior = C.log(label_prior)     #Computes the element-wise the natural logarithm of  label_prior

    if (model_type=="DNN"):
        net = MyDNNLayer(512,4)(feature_norm)              ###########
    elif (model_type=="BLSTM"):
        net = MyBLSTMLayer(512,3)(feature_norm)
    else:
        raise RuntimeError("model_type must be DNN or BLSTM")
                                              #initial value of weights W            #'C.he_normal'-->initializer for Parameter initialized to Gaussian distribution with mean 0 and standard deviation scale *....
    out = C.layers.Dense(num_classes, init=C.he_normal(scale=1/3))(net)   #####last layer in any network of both NN

    ##### loss and metric##
    ce = C.cross_entropy_with_softmax(out, label_var)      #loss function  ((objective function))
    pe = C.classification_error(out, label_var)    ###for evaluation
    ScaledLogLikelihood = C.minus(out, log_prior, name='ScaledLogLikelihood')

    # talk to the user
    C.logging.log_number_of_parameters(out)     #print number of parameters in the whole model
    print()

    return {
        'feature': feature_var,
        'label': label_var,
        'output': out,
        'ScaledLogLikelihood': ScaledLogLikelihood,
        'ce': ce,
        'pe': pe,
        'final_hidden': net # adding last hidden layer output for future use in CTC tutorial
    }
示例#28
0
def focal_loss_with_softmax(output_vector, target_vector, alpha=1, gamma=2., axis=-1, name=''):
    """
    CNTK multi-class implementation of focal loss from "Focal Loss for Dense Object Detection" by Tsung-Yi Lin et al.

    Focal loss add a factor (1 - p) ^ gamma to the standard cross entropy criterion. Setting gamma > 0 reduces the
    relative loss for well-classified examples (p > .5), putting more focus on hard, misclassified examples.
    Focal loss enables the training of highly accurate dense object detectors in the presence of vast
    numbers of easy background examples or dataset with extreme class imbalance (e.g. 1:1000).

    This implementation will work in semantic segmentation of images i.e. output can
    be a rank 2 tensor of shape (num_classes, row, col)

    Maths:
        Focal Loss = - alpha * (1 - p) ^ gamma * log ( p )

    Example:
        Cx.focal_loss_with_softmax([[0, 0, 0.8, 0.2]], [[0, 0, 1, 0]]).eval()
        array([[0.31306446]], dtype=float32)

    Arguments:
        output_vector: the unscaled computed output values from the network. Can be
          from shape (num_classes,) for classification up to shape (num_classes, row, col) for semantic segmentation
          of images.
        target_vector: usually it is one-hot vector where the hot bit
         corresponds to the label index. But it can be any probability
         distribution over the labels.
        alpha (float): sacling factor. weight assigned to rare classes.
          should slightly decrease as gamma increase. (defaults 1)
        gamma (float): Larger gamma reduces relative loss for well-classified examples.
          Recommended range [0.5, 5] (Default 2.)
        axis (int or :class:`~cntk.axis.Axis`, optional): if given, focal loss will be computed
                along this axis
        name (str, optional): the name of the Function instance in the network

    Returns:
        :class:`~cntk.ops.functions.Function`

    """
    prob = C.softmax(output_vector, axis=axis)
    log_prob = target_vector * C.log(prob)  # cross entropy with softmax

    factor = C.pow(1 - prob, gamma)

    return C.negate(alpha * C.reduce_sum(factor * log_prob, axis=axis), name=name)
示例#29
0
def cross_entropy_with_sampled_softmax(
    hidden_vector,          
    label_vector,           
    vocab_dim,              
    hidden_dim,             
    num_samples,            
    sampling_weights,       
    allow_duplicates = False 
    ):

	bias = C.layers.Parameter(shape = (vocab_dim, 1), init = 0)
	weights = C.layers.Parameter(shape = (vocab_dim, hidden_dim), init = C.initializer.glorot_uniform())

	sample_selector_sparse = C.random_sample(sampling_weights, num_samples, allow_duplicates)
	sample_selector = sample_selector_sparse

	inclusion_probs = C.random_sample_inclusion_frequency(sampling_weights, num_samples, allow_duplicates)
	log_prior = C.log(inclusion_probs)

	wS = C.times(sample_selector, weights, name='wS')
	zS = C.times_transpose(wS, hidden_vector, name='zS1') + C.times(sample_selector, bias, name='zS2') - C.times_transpose (sample_selector, log_prior, name='zS3')

	# Getting the weight vector for the true label. Dimension hidden_dim
	wT = C.times(label_vector, weights, name='wT')
	zT = C.times_transpose(wT, hidden_vector, name='zT1') + C.times(label_vector, bias, name='zT2') - C.times_transpose(label_vector, log_prior, name='zT3')

	zSReduced = C.reduce_log_sum_exp(zS)

	# Compute the cross entropy that is used for training.
	cross_entropy_on_samples = C.log_add_exp(zT, zSReduced) - zT

	# For applying the model we also output a node providing the input for the full softmax
	z = C.times_transpose(weights, hidden_vector) + bias
	z = C.reshape(z, shape = (vocab_dim))

	zSMax = C.reduce_max(zS)
	error_on_samples = C.less(zT, zSMax)

	return (z, cross_entropy_on_samples, error_on_samples)
示例#30
0
def log(x, name=''):
    '''
    Computes the element-wise the natural logarithm of `x`: 
    
    Example:
        >>> C.eval(C.log([1., 2.]))
        [array([[ 0.      ,  0.69314718056]])]

    Args:
        x: numpy array or any :class:`cntk.Function` that outputs a tensor
        name (str): the name of the node in the network
    Returns:
        :class:`cntk.Function`
                
    Note:
        CNTK returns -85.1 for log(x) if `x` is negative or zero. The reason is that 
        it uses 1e-37 (whose natural logarithm is -85.1) as the smallest float 
        number for `log`, because this is the only guaranteed precision across 
        platforms. This will be changed to return `NaN` and `-inf`.
    '''
    from cntk import log
    x = sanitize_input(x)
    return log(x, name).output()    
示例#31
0
def log(x, name=''):
    '''
    Computes the element-wise the natural logarithm of `x`: 
    
    Example:
        >>> C.eval(C.log([1., 2.]))
        [array([[ 0.      ,  0.69314718056]])]

    Args:
        x: numpy array or any :class:`cntk.Function` that outputs a tensor
        name (str): the name of the node in the network
    Returns:
        :class:`cntk.Function`
                
    Note:
        CNTK returns -85.1 for log(x) if `x` is negative or zero. The reason is that 
        it uses 1e-37 (whose natural logarithm is -85.1) as the smallest float 
        number for `log`, because this is the only guaranteed precision across 
        platforms. This will be changed to return `NaN` and `-inf`.
    '''
    from cntk import log
    x = sanitize_input(x)
    return log(x, name).output()    
示例#32
0
    def _create_model(self, input_dim, output_dim, hidden_dims):
        c_in = C.input_variable(input_dim, name='state')
        model = c_in

        for h in hidden_dims:
            model = C.layers.Dense(h, activation=C.relu)(model)
        model = C.layers.Dense(output_dim, activation=C.softmax)(model)

        c_action_prob = model
        c_action_onehot = C.input_variable(output_dim, name='action_onehot')
        c_reward = C.input_variable(1, name='reward')
        action_prob = C.reduce_sum(c_action_prob * c_action_onehot)
        log_action_prog = C.log(action_prob)
        loss = -log_action_prog * c_reward
        loss = C.reduce_mean(loss)

        lr = 1e-2
        lr_schedule = C.learning_parameter_schedule(lr)
        learner = C.adam(model.parameters, lr_schedule,
                         C.momentum_schedule(0.9))
        trainer = C.Trainer(model, (loss, None), learner)

        return model, loss, trainer
示例#33
0
def train_and_test(reader_train, reader_test, model_func):

    ###############################################
    # Training the model
    ###############################################

    # Instantiate the input and the label variables
    input = C.input_variable(input_dim)
    label = C.input_variable(input_dim)

    # Create the model function
    model = model_func(input)

    # The labels for this network is same as the input MNIST image.
    # Note: Inside the model we are scaling the input to 0-1 range
    # Hence we rescale the label to the same range
    # We show how one can use their custom loss function
    # loss = -(y* log(p)+ (1-y) * log(1-p)) where p = model output and y = target
    # We have normalized the input between 0-1. Hence we scale the target to same range

    target = label / 255.0
    loss = -(target * C.log(model) + (1 - target) * C.log(1 - model))
    label_error = C.classification_error(model, target)

    # training config
    epoch_size = 30000  # 30000 samples is half the dataset size
    minibatch_size = 64
    num_sweeps_to_train_with = 5 if isFast else 100
    num_samples_per_sweep = 60000
    num_minibatches_to_train = (num_samples_per_sweep *
                                num_sweeps_to_train_with) // minibatch_size

    # Instantiate the trainer object to drive the model training
    lr_per_sample = [0.00003]
    lr_schedule = C.learning_parameter_schedule_per_sample(
        lr_per_sample, epoch_size)

    # Momentum which is applied on every minibatch_size = 64 samples
    momentum_schedule = C.momentum_schedule(0.9126265014311797, minibatch_size)

    # We use a variant of the Adam optimizer which is known to work well on this dataset
    # Feel free to try other optimizers from
    # https://www.cntk.ai/pythondocs/cntk.learner.html#module-cntk.learner
    learner = C.fsadagrad(model.parameters,
                          lr=lr_schedule,
                          momentum=momentum_schedule)

    # Instantiate the trainer
    progress_printer = C.logging.ProgressPrinter(0)
    trainer = C.Trainer(model, (loss, label_error), learner, progress_printer)

    # Map the data streams to the input and labels.
    # Note: for autoencoders input == label
    input_map = {
        input: reader_train.streams.features,
        label: reader_train.streams.features
    }

    aggregate_metric = 0
    for i in range(num_minibatches_to_train):
        # Read a mini batch from the training data file
        data = reader_train.next_minibatch(minibatch_size, input_map=input_map)

        # Run the trainer on and perform model training
        trainer.train_minibatch(data)
        samples = trainer.previous_minibatch_sample_count
        aggregate_metric += trainer.previous_minibatch_evaluation_average * samples

    train_error = (aggregate_metric *
                   100.0) / (trainer.total_number_of_samples_seen)
    print("Average training error: {0:0.2f}%".format(train_error))

    #############################################################################
    # Testing the model
    # Note: we use a test file reader to read data different from a training data
    #############################################################################

    # Test data for trained model
    test_minibatch_size = 32
    num_samples = 10000
    num_minibatches_to_test = num_samples / test_minibatch_size

    # Test error metric calculation
    metric_numer = 0
    metric_denom = 0

    test_input_map = {
        input: reader_test.streams.features,
        label: reader_test.streams.features
    }

    for i in range(0, int(num_minibatches_to_test)):

        # We are loading test data in batches specified by test_minibatch_size
        # Each data point in the minibatch is a MNIST digit image of 784 dimensions
        # with one pixel per dimension that we will encode / decode with the
        # trained model.
        data = reader_test.next_minibatch(test_minibatch_size,
                                          input_map=test_input_map)

        # Specify the mapping of input variables in the model to actual
        # minibatch data to be tested with
        eval_error = trainer.test_minibatch(data)

        # minibatch data to be trained with
        metric_numer += np.abs(eval_error * test_minibatch_size)
        metric_denom += test_minibatch_size

    # Average of evaluation errors of all test minibatches
    test_error = (metric_numer * 100.0) / (metric_denom)
    print("Average test error: {0:0.2f}%".format(test_error))

    return model, train_error, test_error
示例#34
0
    #
    z = C.input_variable(shape=(z_dim, ), dtype="float32", needs_gradient=True)
    x = C.input_variable(shape=(img_channel, img_height, img_width),
                         dtype="float32",
                         needs_gradient=True)
    x_real = (x - 127.5) / 127.5

    G_fake = dcgan_generator(z)
    D_real = dcgan_discriminator(x_real)
    D_fake = D_real.clone(method="share",
                          substitutions={x_real.output: G_fake.output})

    #
    # loss function
    #
    G_loss = -C.log(D_fake)
    D_loss = -(C.log(D_real) + C.log(1.0 - D_fake))

    #
    # optimizer
    #
    G_learner = C.adam(G_fake.parameters,
                       lr=C.learning_parameter_schedule_per_sample(1e-4),
                       momentum=0.5,
                       gradient_clipping_threshold_per_sample=minibatch_size,
                       gradient_clipping_with_truncation=True)
    D_learner = C.adam(D_real.parameters,
                       lr=C.learning_parameter_schedule_per_sample(1e-4),
                       momentum=0.5,
                       gradient_clipping_threshold_per_sample=minibatch_size,
                       gradient_clipping_with_truncation=True)
示例#35
0
def crossentropy(y, t):
    prob = C.squeeze(C.reduce_sum(y * t, axis=0), 0)
    return -C.reduce_mean(C.unpack_batch(C.log(prob)))
示例#36
0
def test_Log(tmpdir):
    data = np.asarray([1., 2.], dtype=np.float32)
    model = C.log(data)
    verify_no_input(model, tmpdir, 'Log_0')
示例#37
0
def seq_loss(logits, y):
    prob = C.sequence.softmax(logits)
    return -C.log(C.sequence.last(C.sequence.gather(prob, y)))
示例#38
0
def train_and_test(reader_train, reader_test, model_func):

    ###############################
    # Training the model
    ###############################

    input = C.input_variable(input_dim)
    label = C.input_variable(input_dim)

    model = model_func(input)

    target = label / 255.0
    loss = -(target * C.log(model) + (1 - target) * C.log(1 - model))
    label_error = C.classification_error(model, target)

    epoch_size = 30000
    minibatch_size = 64
    num_sweeps_to_train_with = 5 if isFast else 100
    num_samples_per_sweep = 60000
    num_minibatches_to_train = (num_samples_per_sweep *
                                num_sweeps_to_train_with) // minibatch_size

    lr_per_sample = [3e-4]
    lr_schedule = C.learning_parameter_schedule_per_sample(
        lr_per_sample, epoch_size)

    momentum_schedule = C.momentum_schedule(0.9126265014311797, minibatch_size)

    learner = C.fsadagrad(model.parameters,
                          lr=lr_schedule,
                          momentum=momentum_schedule)

    progress_printer = C.logging.ProgressPrinter(0)
    trainer = C.Trainer(model, (loss, label_error), learner, progress_printer)

    input_map = {
        input: reader_train.streams.features,
        label: reader_train.streams.features
    }

    aggregate_metric = 0
    for i in range(num_minibatches_to_train):
        data = reader_train.next_minibatch(minibatch_size, input_map=input_map)
        trainer.train_minibatch(data)
        samples = trainer.previous_minibatch_sample_count
        aggregate_metric += trainer.previous_minibatch_evaluation_average * samples

    train_error = (aggregate_metric *
                   100) / (trainer.total_number_of_samples_seen)
    print("Average training error: {0:0.2f}%".format(train_error))

    #############################################################################
    # Testing the model
    # Note: we use a test file reader to read data different from a training data
    #############################################################################

    test_minibatch_size = 32
    num_samples = 10000
    num_minibatches_to_test = num_samples / test_minibatch_size
    test_result = 0

    # Test error metric calculation
    metric_numer = 0
    metric_denom = 0

    test_input_map = {
        input: reader_test.streams.features,
        label: reader_test.streams.features
    }

    for i in range(0, int(num_minibatches_to_test)):
        data = reader_test.next_minibatch(test_minibatch_size,
                                          input_map=test_input_map)
        eval_error = trainer.test_minibatch(data)
        metric_numer += np.abs(eval_error * test_minibatch_size)
        metric_denom += test_minibatch_size
    test_error = (metric_numer * 100) / (metric_denom)
    print("Average test error: {0:0.2f}%".format(test_error))

    return model, train_error, test_error
示例#39
0
def test_Log(tmpdir):
    data = np.asarray([1., 2.], dtype=np.float32)
    model = C.log(data)
    verify_no_input(model, tmpdir, 'Log_0')
示例#40
0
def test_Log(tmpdir, dtype):
    with C.default_options(dtype = dtype):
        data = np.asarray([1., 2.], dtype=dtype)
        model = C.log(data)
        verify_no_input(model, tmpdir, 'Log_0')