Python sgd示例，dagbldr.optimizers.sgd Python示例

示例#1

0

显示文件

def run_lstm():
    del_shared()
    n_in = X.shape[-1]
    n_hid = 20
    n_out = y.shape[-1]

    random_state = np.random.RandomState(42)
    h_init = np.zeros((minibatch_size, 2 * n_hid)).astype("float32")

    h0 = tensor.fmatrix()

    l1 = lstm_fork([X_sym], [n_in], n_hid, name="l1",
                    random_state=random_state)

    def step(in_t, h_tm1):
        h_t = lstm(in_t, h_tm1, n_hid, name="rec", random_state=random_state)
        return h_t

    h, _ = theano.scan(step, sequences=[l1], outputs_info=[h0])
    h_o = slice_state(h, n_hid)

    pred = linear([h_o], [n_hid], n_out, name="l2", random_state=random_state)
    cost = ((y_sym - pred) ** 2).sum()
    params = list(get_params().values())

    grads = tensor.grad(cost, params)
    learning_rate = 0.000000000001
    opt = sgd(params, learning_rate)
    updates = opt.updates(params, grads)

    f = theano.function([X_sym, y_sym, h0], [cost, h], updates=updates,
                        mode="FAST_COMPILE")
    f(X, y, h_init)

示例#2

0

显示文件

文件： test_full_feedforward.py 项目： dribnet/dagbldr

def test_feedforward_theano_mix():
    minibatch_size = 100
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()

    X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)

    l1_o = linear_layer([X_sym], graph, 'l1', proj_dim=20,
                        random_state=random_state)
    l1_o = .999 * l1_o
    y_pred = softmax_layer([l1_o], graph, 'pred', n_classes,
                           random_state=random_state)

    cost = categorical_crossentropy(y_pred, y_sym).mean()
    params, grads = get_params_and_grads(graph, cost)
    learning_rate = 0.001
    opt = sgd(params)
    updates = opt.updates(params, grads, learning_rate)

    fit_function = theano.function([X_sym, y_sym], [cost], updates=updates,
                                   mode="FAST_COMPILE")

    cost_function = theano.function([X_sym, y_sym], [cost],
                                    mode="FAST_COMPILE")

    checkpoint_dict = {}
    train_indices = np.arange(len(X))
    valid_indices = np.arange(len(X))
    early_stopping_trainer(fit_function, cost_function, checkpoint_dict, [X, y],
                           minibatch_size,
                           train_indices, valid_indices,
                           fit_function_output_names=["cost"],
                           cost_function_output_name="valid_cost",
                           n_epochs=1)

示例#3

0

显示文件

文件： test_full_rnn.py 项目： dribnet/dagbldr

def test_tanh_rnn():
    # random state so script is deterministic
    random_state = np.random.RandomState(1999)
    # home of the computational graph
    graph = OrderedDict()

    # number of hidden features
    n_hid = 10
    # number of output_features = input_features
    n_out = X.shape[-1]

    # input (where first dimension is time)
    datasets_list = [X, X_mask, y, y_mask]
    names_list = ["X", "X_mask", "y", "y_mask"]
    test_values_list = [X, X_mask, y, y_mask]
    X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
        datasets_list, names_list, graph, list_of_test_values=test_values_list)

    # Setup weights
    l1 = linear_layer([X_sym], graph, 'l1_proj', n_hid, random_state)

    h = tanh_recurrent_layer([l1], X_mask_sym, n_hid, graph, 'l1_rec',
                             random_state)

    # linear output activation
    y_hat = linear_layer([h], graph, 'l2_proj', n_out, random_state)

    # error between output and target
    cost = squared_error(y_hat, y_sym)
    cost = masked_cost(cost, y_mask_sym).mean()
    # Parameters of the model
    params, grads = get_params_and_grads(graph, cost)

    # Use stochastic gradient descent to optimize
    opt = sgd(params)
    learning_rate = 0.001
    updates = opt.updates(params, grads, learning_rate)

    fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                   [cost],
                                   updates=updates,
                                   mode="FAST_COMPILE")

    cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                    [cost],
                                    mode="FAST_COMPILE")
    checkpoint_dict = {}
    train_indices = np.arange(X.shape[1])
    valid_indices = np.arange(X.shape[1])
    early_stopping_trainer(fit_function,
                           cost_function,
                           checkpoint_dict, [X, y],
                           minibatch_size,
                           train_indices,
                           valid_indices,
                           fit_function_output_names=["cost"],
                           cost_function_output_name="valid_cost",
                           n_epochs=1)

示例#4

0

显示文件

文件： test_layers.py 项目： dribnet/dagbldr

def test_fixed_projection_layer():
    random_state = np.random.RandomState(1999)
    rand_projection = random_state.randn(64, 12)

    graph = OrderedDict()
    X_sym = add_datasets_to_graph([X], ["X"], graph)
    out = fixed_projection_layer([X_sym], rand_projection, graph, 'proj')
    out2 = fixed_projection_layer([X_sym],
                                  rand_projection,
                                  graph,
                                  'proj',
                                  pre=rand_projection[:, 0])
    out3 = fixed_projection_layer([X_sym],
                                  rand_projection,
                                  graph,
                                  'proj',
                                  post=rand_projection[0])
    final = linear_layer([out2],
                         graph,
                         'linear',
                         17,
                         random_state=random_state)
    # Test that it compiles with and without bias
    f = theano.function([X_sym], [out, out2, out3, final], mode="FAST_COMPILE")

    # Test updates
    params, grads = get_params_and_grads(graph, final.mean())
    opt = sgd(params)
    updates = opt.updates(params, grads, .1)
    f2 = theano.function([X_sym], [out2, final], updates=updates)
    ret = f(np.ones_like(X))[0]
    assert ret.shape[1] != X.shape[1]
    ret2 = f(np.ones_like(X))[1]
    assert ret.shape[1] != X.shape[1]
    out1, final1 = f2(X)
    out2, final2 = f2(X)

    # Make sure fixed basis is unchanged
    assert_almost_equal(out1, out2)

    # Make sure linear layer is updated
    assert_raises(AssertionError, assert_almost_equal, final1, final2)

示例#5

0

显示文件

def test_feedforward_theano_mix():
    del_shared()
    minibatch_size = 100
    random_state = np.random.RandomState(1999)
    X_sym = tensor.fmatrix()
    y_sym = tensor.fmatrix()

    l1_o = linear([X_sym], [X.shape[1]],
                  proj_dim=20,
                  name='l1',
                  random_state=random_state)
    l1_o = .999 * l1_o
    y_pred = softmax([l1_o], [20],
                     proj_dim=n_classes,
                     name='out',
                     random_state=random_state)

    cost = categorical_crossentropy(y_pred, y_sym).mean()
    params = list(get_params().values())
    grads = theano.grad(cost, params)
    learning_rate = 0.001
    opt = sgd(params, learning_rate)
    updates = opt.updates(params, grads)

    fit_function = theano.function([X_sym, y_sym], [cost],
                                   updates=updates,
                                   mode="FAST_COMPILE")

    cost_function = theano.function([X_sym, y_sym], [cost],
                                    mode="FAST_COMPILE")

    train_itr = minibatch_iterator([X, y], minibatch_size, axis=0)
    valid_itr = minibatch_iterator([X, y], minibatch_size, axis=0)
    X_train, y_train = next(train_itr)
    X_valid, y_valid = next(valid_itr)
    fit_function(X_train, y_train)
    cost_function(X_valid, y_valid)

示例#6

0

显示文件

文件： test_nodes.py 项目： hdubey/dagbldr

def test_fixed_projection():
    random_state = np.random.RandomState(1999)
    rand_projection = random_state.randn(64, 12)
    rand_dim = rand_projection.shape[1]

    out = fixed_projection([X_sym], [X.shape[1]], rand_projection, 'proj1')
    out2 = fixed_projection([X_sym], [X.shape[1]],
                            rand_projection,
                            'proj2',
                            pre=rand_projection[:, 0])
    out3 = fixed_projection([X_sym], [X.shape[1]],
                            rand_projection,
                            'proj3',
                            post=rand_projection[0])
    final = linear([out2], [rand_dim], 5, 'linear', random_state=random_state)
    # Test that it compiles with and without bias
    f = theano.function([X_sym], [out, out2, out3, final], mode="FAST_COMPILE")

    # Test updates
    params = list(get_params().values())
    grads = tensor.grad(final.mean(), params)
    opt = sgd(params, .1)
    updates = opt.updates(params, grads)
    f2 = theano.function([X_sym], [out2, final], updates=updates)
    ret = f(np.ones_like(X))[0]
    assert ret.shape[1] != X.shape[1]
    ret2 = f(np.ones_like(X))[1]
    assert ret.shape[1] != X.shape[1]
    out1, final1 = f2(X)
    out2, final2 = f2(X)

    # Make sure fixed basis is unchanged
    assert_almost_equal(out1, out2)

    # Make sure linear layer is updated
    assert_raises(AssertionError, assert_almost_equal, final1, final2)

示例#7

0

显示文件

minibatch_size = 20
n_hid = 500

l1 = tanh([X_sym], [X.shape[1]], proj_dim=n_hid, name='l1',
          random_state=random_state)
y_pred = softmax_zeros([l1], [n_hid], proj_dim=n_targets, name='y_pred')
nll = categorical_crossentropy(y_pred, y_sym).mean()
weights = get_weights(skip_regex=None).values()
L2 = sum([(w ** 2).sum() for w in weights])
cost = nll + .0001 * L2

params = list(get_params().values())
grads = theano.grad(cost, params)

learning_rate = 0.01
opt = sgd(params, learning_rate)
updates = opt.updates(params, grads)

fit_function = theano.function([X_sym, y_sym], [cost], updates=updates)
cost_function = theano.function([X_sym, y_sym], [cost])
predict_function = theano.function([X_sym], [y_pred])

checkpoint_dict = create_checkpoint_dict(locals())


train_itr = minibatch_iterator([X, y], minibatch_size, axis=0,
                               stop_index=60000)
valid_itr = minibatch_iterator([X, y], minibatch_size, axis=0,
                               start_index=60000)

示例#8

0

显示文件

def test_vae():
    minibatch_size = 10
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()

    X_sym = add_datasets_to_graph([X], ["X"], graph)

    l1_enc = softplus_layer([X_sym],
                            graph,
                            'l1_enc',
                            proj_dim=100,
                            random_state=random_state)
    mu = linear_layer([l1_enc],
                      graph,
                      'mu',
                      proj_dim=50,
                      random_state=random_state)
    log_sigma = linear_layer([l1_enc],
                             graph,
                             'log_sigma',
                             proj_dim=50,
                             random_state=random_state)
    samp = gaussian_log_sample_layer([mu], [log_sigma],
                                     graph,
                                     'gaussian_log_sample',
                                     random_state=random_state)
    l1_dec = softplus_layer([samp],
                            graph,
                            'l1_dec',
                            proj_dim=100,
                            random_state=random_state)
    out = sigmoid_layer([l1_dec],
                        graph,
                        'out',
                        proj_dim=X.shape[1],
                        random_state=random_state)

    kl = gaussian_log_kl([mu], [log_sigma], graph, 'gaussian_kl').mean()
    cost = binary_crossentropy(out, X_sym).mean() + kl
    params, grads = get_params_and_grads(graph, cost)
    learning_rate = 0.00000
    opt = sgd(params)
    updates = opt.updates(params, grads, learning_rate)

    fit_function = theano.function([X_sym], [cost],
                                   updates=updates,
                                   mode="FAST_COMPILE")

    cost_function = theano.function([X_sym], [cost], mode="FAST_COMPILE")

    checkpoint_dict = {}
    train_indices = np.arange(len(X))
    valid_indices = np.arange(len(X))
    early_stopping_trainer(fit_function,
                           cost_function,
                           checkpoint_dict, [X],
                           minibatch_size,
                           train_indices,
                           valid_indices,
                           fit_function_output_names=["cost"],
                           cost_function_output_name="valid_cost",
                           n_epochs=1)