Python categorical_crossentropy示例，dagbldr.nodes.categorical_crossentropy Python示例

示例#1

0

显示文件

文件： test_full_feedforward.py 项目： dribnet/dagbldr

def test_feedforward_theano_mix():
    minibatch_size = 100
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()

    X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)

    l1_o = linear_layer([X_sym], graph, 'l1', proj_dim=20,
                        random_state=random_state)
    l1_o = .999 * l1_o
    y_pred = softmax_layer([l1_o], graph, 'pred', n_classes,
                           random_state=random_state)

    cost = categorical_crossentropy(y_pred, y_sym).mean()
    params, grads = get_params_and_grads(graph, cost)
    learning_rate = 0.001
    opt = sgd(params)
    updates = opt.updates(params, grads, learning_rate)

    fit_function = theano.function([X_sym, y_sym], [cost], updates=updates,
                                   mode="FAST_COMPILE")

    cost_function = theano.function([X_sym, y_sym], [cost],
                                    mode="FAST_COMPILE")

    checkpoint_dict = {}
    train_indices = np.arange(len(X))
    valid_indices = np.arange(len(X))
    early_stopping_trainer(fit_function, cost_function, checkpoint_dict, [X, y],
                           minibatch_size,
                           train_indices, valid_indices,
                           fit_function_output_names=["cost"],
                           cost_function_output_name="valid_cost",
                           n_epochs=1)

示例#2

0

显示文件

文件： test_full_encdec.py 项目： samim23/dagbldr

def test_conditional_gru_recurrent():
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()
    n_hid = 5
    n_out = n_chars

    # input (where first dimension is time)
    datasets_list = [X_mb, X_mask, y_mb, y_mask]
    names_list = ["X", "X_mask", "y", "y_mask"]
    X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
        datasets_list, names_list, graph)

    h = gru_recurrent_layer([X_sym], X_mask_sym, n_hid, graph, 'l1_end',
                            random_state)

    shifted_y_sym = shift_layer([y_sym], graph, 'shift')

    h_dec, context = conditional_gru_recurrent_layer([y_sym], [h], y_mask_sym,
                                                     n_hid, graph, 'l2_dec',
                                                     random_state)

    # linear output activation
    y_hat = softmax_layer([h_dec, context, shifted_y_sym], graph, 'l2_proj',
                          n_out, random_state=random_state)

    # error between output and target
    cost = categorical_crossentropy(y_hat, y_sym)
    cost = masked_cost(cost, y_mask_sym).mean()
    # Parameters of the model
    """
    params, grads = get_params_and_grads(graph, cost)

    # Use stochastic gradient descent to optimize
    opt = sgd(params)
    learning_rate = 0.00000
    updates = opt.updates(params, grads, learning_rate)


    fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                   [cost], updates=updates,
                                   mode="FAST_COMPILE")
    """

    cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                    [cost], mode="FAST_COMPILE")

    checkpoint_dict = {}
    train_indices = np.arange(len(X))
    valid_indices = np.arange(len(X))
    early_stopping_trainer(cost_function, cost_function,
                           train_indices, valid_indices,
                           checkpoint_dict,
                           [X, y],
                           minibatch_size,
                           list_of_minibatch_functions=[text_minibatch_func],
                           list_of_train_output_names=["cost"],
                           valid_output_name="valid_cost",
                           n_epochs=1)

示例#3

0

显示文件

def test_conditional_gru_recurrent():
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()
    n_hid = 5
    n_out = n_chars

    # input (where first dimension is time)
    datasets_list = [X_mb, X_mask, y_mb, y_mask]
    names_list = ["X", "X_mask", "y", "y_mask"]
    X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
        datasets_list, names_list, graph)

    h = gru_recurrent_layer([X_sym], X_mask_sym, n_hid, graph, 'l1_end',
                            random_state)

    shifted_y_sym = shift_layer([y_sym], graph, 'shift')

    h_dec, context = conditional_gru_recurrent_layer([y_sym], [h], y_mask_sym,
                                                     n_hid, graph, 'l2_dec',
                                                     random_state)

    # linear output activation
    y_hat = softmax_layer([h_dec, context, shifted_y_sym], graph, 'l2_proj',
                          n_out, random_state)

    # error between output and target
    cost = categorical_crossentropy(y_hat, y_sym)
    cost = masked_cost(cost, y_mask_sym).mean()
    # Parameters of the model
    """
    params, grads = get_params_and_grads(graph, cost)

    # Use stochastic gradient descent to optimize
    opt = sgd(params)
    learning_rate = 0.00000
    updates = opt.updates(params, grads, learning_rate)


    fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                   [cost], updates=updates,
                                   mode="FAST_COMPILE")
    """

    cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                    [cost], mode="FAST_COMPILE")

    checkpoint_dict = {}
    train_indices = np.arange(len(X))
    valid_indices = np.arange(len(X))
    early_stopping_trainer(cost_function, cost_function, checkpoint_dict,
                           [X, y],
                           minibatch_size, train_indices, valid_indices,
                           list_of_minibatch_functions=[text_minibatch_func],
                           fit_function_output_names=["cost"],
                           cost_function_output_name="valid_cost",
                           n_epochs=1)

示例#4

0

显示文件

文件： test_full_feedforward.py 项目： TPNguyen/dagbldr

def test_feedforward_classifier():
    minibatch_size = 100
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()

    X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)

    l1_o = linear_layer([X_sym], graph, "l1", proj_dim=20, random_state=random_state)
    y_pred = softmax_layer([l1_o], graph, "pred", n_classes, random_state=random_state)

    cost = categorical_crossentropy(y_pred, y_sym).mean()
    params, grads = get_params_and_grads(graph, cost)
    learning_rate = 0.001
    opt = sgd(params)
    updates = opt.updates(params, grads, learning_rate)

    train_function = theano.function([X_sym, y_sym], [cost], updates=updates, mode="FAST_COMPILE")

    iterate_function(train_function, [X, y], minibatch_size, list_of_output_names=["cost"], n_epochs=1)

示例#5

0

显示文件

def test_feedforward_theano_mix():
    del_shared()
    minibatch_size = 100
    random_state = np.random.RandomState(1999)
    X_sym = tensor.fmatrix()
    y_sym = tensor.fmatrix()

    l1_o = linear([X_sym], [X.shape[1]],
                  proj_dim=20,
                  name='l1',
                  random_state=random_state)
    l1_o = .999 * l1_o
    y_pred = softmax([l1_o], [20],
                     proj_dim=n_classes,
                     name='out',
                     random_state=random_state)

    cost = categorical_crossentropy(y_pred, y_sym).mean()
    params = list(get_params().values())
    grads = theano.grad(cost, params)
    learning_rate = 0.001
    opt = sgd(params, learning_rate)
    updates = opt.updates(params, grads)

    fit_function = theano.function([X_sym, y_sym], [cost],
                                   updates=updates,
                                   mode="FAST_COMPILE")

    cost_function = theano.function([X_sym, y_sym], [cost],
                                    mode="FAST_COMPILE")

    train_itr = minibatch_iterator([X, y], minibatch_size, axis=0)
    valid_itr = minibatch_iterator([X, y], minibatch_size, axis=0)
    X_train, y_train = next(train_itr)
    X_valid, y_valid = next(valid_itr)
    fit_function(X_train, y_train)
    cost_function(X_valid, y_valid)

示例#6

0

显示文件

文件： relu_mlp.py 项目： hdubey/dagbldr

# graph holds information necessary to build layers from parents
graph = OrderedDict()
X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)
# random state so script is deterministic
random_state = np.random.RandomState(1999)

minibatch_size = 20
n_hid = 1000

l1 = relu_layer([X_sym],
                graph,
                'l1',
                proj_dim=n_hid,
                random_state=random_state)
y_pred = softmax_zeros_layer([l1], graph, 'y_pred', proj_dim=n_targets)
nll = categorical_crossentropy(y_pred, y_sym).mean()
weights = get_weights_from_graph(graph)
L2 = sum([(w**2).sum() for w in weights])
cost = nll + .0001 * L2

params, grads = get_params_and_grads(graph, cost)

learning_rate = 1E-4
momentum = 0.95
opt = rmsprop(params, learning_rate, momentum)
updates = opt.updates(params, grads)

fit_function = theano.function([X_sym, y_sym], [cost], updates=updates)
cost_function = theano.function([X_sym, y_sym], [cost])
predict_function = theano.function([X_sym], [y_pred])

示例#7

0

显示文件

文件： rnn_word_lm.py 项目： kastnerkyle/dagbldr

l1 = embed([X_sym], n_classes, n_emb, name="emb", random_state=random_state)
in_fork = lstm_fork([l1], [n_emb], n_hid, name="h1", random_state=random_state)


def step(in_t, h_tm1):
    h_t = lstm(in_t, h_tm1, [n_hid], n_hid, name="lstm_l1", random_state=random_state)
    return h_t


h, _ = theano.scan(step, sequences=[in_fork], outputs_info=[h0])

h_o = slice_state(h, n_hid)

y_pred = softmax([h_o], [n_hid], n_classes, name="h2", random_state=random_state)
loss = categorical_crossentropy(y_pred, y_sym)
cost = loss.mean(axis=1).sum(axis=0)

params = list(get_params().values())
params = params
grads = tensor.grad(cost, params)

learning_rate = 0.0001
opt = adam(params, learning_rate)
updates = opt.updates(params, grads)

fit_function = theano.function([X_sym, y_sym, h0], [cost, h], updates=updates)
cost_function = theano.function([X_sym, y_sym, h0], [cost, h])
predict_function = theano.function([X_sym, h0], [y_pred, h])

示例#8

0

显示文件

文件： test_penalties.py 项目： dribnet/dagbldr

def test_categorical_crossentropy():
    graph = OrderedDict()
    y_sym = add_datasets_to_graph([y], ["y"], graph)
    cost = categorical_crossentropy(.99 * y_sym + .001, y_sym)
    theano.function([y_sym], cost, mode="FAST_COMPILE")

示例#9

0

显示文件

文件： mlp.py 项目： kastnerkyle/dagbldr

y = mnist["target"]
n_targets = 10
y = convert_to_one_hot(y, n_targets)

# graph holds information necessary to build layers from parents
graph = OrderedDict()
X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)
# random state so script is deterministic
random_state = np.random.RandomState(1999)

minibatch_size = 20
n_hid = 1000

l1 = tanh_layer([X_sym], graph, 'l1', proj_dim=n_hid, random_state=random_state)
y_pred = softmax_zeros_layer([l1], graph, 'y_pred',  proj_dim=n_targets)
nll = categorical_crossentropy(y_pred, y_sym).mean()
weights = get_weights_from_graph(graph)
L2 = sum([(w ** 2).sum() for w in weights])
cost = nll + .0001 * L2


params, grads = get_params_and_grads(graph, cost)

learning_rate = 1E-4
momentum = 0.95
opt = rmsprop(params, learning_rate, momentum)
updates = opt.updates(params, grads)

fit_function = theano.function([X_sym, y_sym], [cost], updates=updates)
cost_function = theano.function([X_sym, y_sym], [cost])
predict_function = theano.function([X_sym], [y_pred])

示例#10

0

显示文件

文件： rnn_word_lm.py 项目： hdubey/dagbldr

               h_tm1, [n_hid],
               n_hid,
               name="lstm_l1",
               random_state=random_state)
    return h_t


h, _ = theano.scan(step, sequences=[in_fork], outputs_info=[h0])

h_o = slice_state(h, n_hid)

y_pred = softmax([h_o], [n_hid],
                 n_classes,
                 name="h2",
                 random_state=random_state)
loss = categorical_crossentropy(y_pred, y_sym)
cost = loss.mean(axis=1).sum(axis=0)

params = list(get_params().values())
params = params
grads = tensor.grad(cost, params)

learning_rate = 0.0001
opt = adam(params, learning_rate)
updates = opt.updates(params, grads)

fit_function = theano.function([X_sym, y_sym, h0], [cost, h], updates=updates)
cost_function = theano.function([X_sym, y_sym, h0], [cost, h])
predict_function = theano.function([X_sym, h0], [y_pred, h])

示例#11

0

显示文件

文件： test_penalties.py 项目： kastnerkyle/dagbldr

def test_categorical_crossentropy():
    cost = categorical_crossentropy(.99 * y_sym + .001, y_sym)
    theano.function([y_sym], cost, mode="FAST_COMPILE")

示例#12

0

显示文件

文件： test_serialize.py 项目： Sandy4321/dagbldr

def test_loop():
    # graph holds information necessary to build layers from parents
    graph = OrderedDict()
    X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)
    # random state so script is deterministic
    random_state = np.random.RandomState(1999)

    minibatch_size = 10

    y_pred = softmax_zeros_layer([X_sym], graph, "y_pred", proj_dim=n_targets)
    nll = categorical_crossentropy(y_pred, y_sym).mean()
    weights = get_weights_from_graph(graph)
    cost = nll

    params, grads = get_params_and_grads(graph, cost)

    learning_rate = 0.13
    opt = sgd(params, learning_rate)
    updates = opt.updates(params, grads)

    fit_function = theano.function([X_sym, y_sym], [cost], updates=updates)
    cost_function = theano.function([X_sym, y_sym], [cost])
    predict_function = theano.function([X_sym], [y_pred])

    checkpoint_dict = {
        "fit_function": fit_function,
        "cost_function": cost_function,
        "predict_function": predict_function,
    }

    def error(*args):
        xargs = args[:-1]
        y = args[-1]
        final_args = xargs
        y_pred = predict_function(*final_args)[0]
        return 1 - np.mean((np.argmax(y_pred, axis=1).ravel()) == (np.argmax(y, axis=1).ravel()))

    TL1 = TrainingLoop(
        fit_function,
        error,
        train_indices[:10],
        valid_indices[:10],
        minibatch_size,
        checkpoint_dict=checkpoint_dict,
        list_of_train_output_names=["train_cost"],
        valid_output_name="valid_error",
        n_epochs=1,
        optimizer_object=opt,
    )
    epoch_results1 = TL1.run([X, y])
    TL1.train_indices = train_indices[10:20]
    TL1.valid_indices = valid_indices[10:20]
    epoch_results1 = TL1.run([X, y])

    TL2 = TrainingLoop(
        fit_function,
        error,
        train_indices[:20],
        valid_indices[:20],
        minibatch_size,
        checkpoint_dict=checkpoint_dict,
        list_of_train_output_names=["train_cost"],
        valid_output_name="valid_error",
        n_epochs=1,
        optimizer_object=opt,
    )
    epoch_results2 = TL2.run([X, y])

    r1 = TL1.__dict__["checkpoint_dict"]["previous_results"]["train_cost"][-1]
    r2 = TL2.__dict__["checkpoint_dict"]["previous_results"]["train_cost"][-1]
    assert r1 == r2

示例#13

0

显示文件

文件： test_penalties.py 项目： hdubey/dagbldr

def test_categorical_crossentropy():
    cost = categorical_crossentropy(.99 * y_sym + .001, y_sym)
    theano.function([y_sym], cost, mode="FAST_COMPILE")