def test_logistic_regression_2(): x = ad.Variable(name = "input") #n*1 w = ad.Variable(name = "weight") #n*1 b = ad.Variable(name = "bias") #1 logits = ad.matmul_op(x, w) + b y = ad.softmax_with_cross_entropy_op(logits) x_val = np.array([[2, 2, 2]]) w_val = np.array([[1, 1, 1], [2, 2, 2]]).transpose() b_val = 5 * np.ones(2) # print(x_val, w_val, b_val, np.matmul(x_val, w_val) + b_val) grad_x, grad_w, grad_b = ad.gradients(y, [x, w, b]) executor = ad.Executor([y, grad_x, grad_w, grad_b]) y_val, grad_x_val, grad_w_val, grad_b_val = \ executor.run(feed_dict = {x: x_val, w: w_val, b: b_val}) x_row_max = (np.dot(x_val, w_val) + b_val).max(axis=-1) x_row_max = x_row_max.reshape(list((np.matmul(x_val, w_val) + b_val).shape)[:-1]+[1]) e_x = np.exp((np.matmul(x_val, w_val) + b_val) - x_row_max) expected_yval = e_x / e_x.sum(axis=-1).reshape(list((np.matmul(x_val, w_val) + b_val).shape)[:-1]+[1]) y_base = np.ones_like(expected_yval) expected_grad_x_val = np.matmul(y_base*(expected_yval - 1), w_val.transpose()) expected_grad_w_val = np.matmul(x_val.transpose(), y_base*(expected_yval - 1)) expected_grad_b_val = y_base*(expected_yval - 1) # print(grad_b_val) # print(expected_grad_b_val) assert isinstance(y, ad.Node) assert np.array_equal(y_val, expected_yval) assert np.array_equal(grad_x_val, expected_grad_x_val) assert np.array_equal(grad_w_val, expected_grad_w_val) assert np.array_equal(grad_b_val, expected_grad_b_val)
def test_matmul_two_vars(): x2 = ad.Variable(name="x2") x3 = ad.Variable(name="x3") y = ad.matmul_op(x2, x3) grad_x2, grad_x3 = ad.gradients(y, [x2, x3]) executor = ad.Executor([y, grad_x2, grad_x3]) x2_val = np.array([[1, 2], [3, 4], [5, 6]]) # 3x2 x3_val = np.array([[7, 8, 9], [10, 11, 12]]) # 2x3 y_val, grad_x2_val, grad_x3_val = executor.run(feed_dict={ x2: x2_val, x3: x3_val }) expected_yval = np.matmul(x2_val, x3_val) expected_grad_x2_val = np.matmul( np.ones_like(expected_yval), np.transpose(x3_val)) expected_grad_x3_val = np.matmul( np.transpose(x2_val), np.ones_like(expected_yval)) assert isinstance(y, ad.Node) assert np.array_equal(y_val, expected_yval) assert np.array_equal(grad_x2_val, expected_grad_x2_val) assert np.array_equal(grad_x3_val, expected_grad_x3_val)
def test_matmul_mix_add_1(): x2 = ad.Variable(name="x2") x3 = ad.Variable(name="x3") x4 = ad.Variable(name="x4") y = ad.matmul_op(x2, x3) z = 2 * (y + x4) grad_x2, grad_x3, grad_x4 = ad.gradients(z, [x2, x3, x4]) executor = ad.Executor([z, grad_x2, grad_x3, grad_x4]) x2_val = np.array([[1, 2], [3, 4], [5, 6]]) # 3x2 x3_val = np.array([[7, 8, 9], [10, 11, 12]]) # 2x3 x4_val = np.array(np.array(list(range(9))).reshape((3, 3))) z_val, grad_x2_val, grad_x3_val, grad_x4_val = executor.run(feed_dict={ x2: x2_val, x3: x3_val, x4: x4_val }) expected_zval = 2 * (np.matmul(x2_val, x3_val) + x4_val) expected_grad_x2_val = 2 * np.matmul(np.ones_like(expected_zval), np.transpose(x3_val)) expected_grad_x3_val = 2 * np.matmul(np.transpose(x2_val), np.ones_like(expected_zval)) expected_grad_x4_val = 2 * np.ones((3, 3)) assert isinstance(z, ad.Node) assert np.array_equal(z_val, expected_zval) assert np.array_equal(grad_x2_val, expected_grad_x2_val) assert np.array_equal(grad_x3_val, expected_grad_x3_val) assert np.array_equal(grad_x4_val, expected_grad_x4_val)
def main(): # Generate dataset and initial weight x_t, y_t = generate_dataset(1, 1, -5, point_num=100) # add extra dim to build homogenous coordinates x_t = np.concatenate((x_t, np.ones((x_t.shape[0], 1))), axis=1) W_val = np.random.rand(3, 1) # draw initial decision superplane draw(W_val, x_t, y_t) # Create the model x = ad.Variable(name='x') W = ad.Variable(name='W') y = ad.sigmoid_op(ad.matmul_op(x, W)) # Define loss y_ = ad.Variable(name='y_') cross_entropy = ad.reduce_mean_op(-ad.reduce_sum_op( y_ * ad.log_op(y) + (1 - y_) * ad.log_op(1 - y), reduction_indices=[1])) # Update rule learning_rate = 0.5 W_grad, = ad.gradients(cross_entropy, [W]) W_train_step = W - learning_rate * W_grad # Training executor = ad.Executor([cross_entropy, y, W_train_step]) steps = 200 plt.ion() for i in range(steps): plt.cla() loss_val, y_val, W_val = executor.run(feed_dict={ x: x_t, y_: y_t, W: W_val, }) print("Step {}: loss: {}".format(i + 1, loss_val)) # draw trained decision superplane draw(W_val, x_t, y_t) plt.pause(0.1) plt.ioff() plt.show()
def fit(self, X, Y): x = ad.Variable(name='x') w = ad.Variable(name='w') y = ad.Variable(name='y') p = 1 / (1 + ad.exp_op(0 - ad.matmul_op(w, x))) # cross entropy loss = 0 - y * ad.log_op(p) - (1 - y) * ad.log_op(1 - p) grad_w, = ad.gradients(loss, [w]) # SGD length = np.shape(X)[0] self.num_feature = np.shape(X)[1] executor = ad.Executor([loss, grad_w]) self.coef_ = np.random.rand(1, self.num_feature) / 1000.0 for i in range(self.maxiter): grad = np.zeros((1, self.num_feature)) loss = 0 for j in range(self.batch): t = random.choice(range(length)) x_val = X[t].reshape((self.num_feature, 1)) if Y[t] == self.labels[0]: y_val = 0 else: y_val = 1 loss_val, grad_w_val = executor.run(feed_dict={ x: x_val, w: self.coef_, y: y_val }) grad = grad + grad_w_val loss = loss + loss_val self.coef_ = self.coef_ - self.learning_rate * grad / self.batch if i % 100 == 0: print(loss)
def linear_regression(): x = ad.Variable(name='x') y = ad.Variable(name='y') W = ad.Variable(name='W') output = ad.matmul_op(x, W) # loss function cost = 0.5 * ad.reduce_sum_op((y - output) * (y - output), axis=0) # cost = 0.5 * ad.matmul_op((y - output), (y - output), True, False) # gradient grad_cost_w, = ad.gradients(cost, [W]) # construct data set # y = x num_point = 10 x_data = np.array(range(num_point)).reshape((num_point, 1)) y_data = x_data + np.random.uniform(-0.1, 0.1, (num_point, 1)) x_data = np.concatenate([x_data, np.ones((num_point, 1))], axis=1) # initialize the parameters w_val = np.array([[0.5], [0.1]]) excutor = ad.Executor([cost, grad_cost_w]) # train n_epoch = 1000 lr = 0.001 cost_list = [] print "training..." for i in range(n_epoch): # evaluate the graph cost_val, grad_cost_w_val = excutor.run(feed_dict={ x: x_data, W: w_val, y: y_data }) # update the parameters using GD print "cost: ", cost_val print "grad: ", grad_cost_w_val w_val = w_val - lr * grad_cost_w_val print "weight: ", w_val cost_list.append(cost_val)
def get_model_params(x_train, y_train, class_1, class_2): '''returns the weights after performing gradient descdent''' learning_rate = 0.01 batch_size = 8 x = ad.Variable(name='x') w = ad.Variable(name='w') y = ad.Variable(name='y') logistic_regression = 1 / (1 + ad.exp_op(0 - ad.matmul_op(w, x))) cross_entropy = -1 * y * ad.log_op(logistic_regression) - (1 - y) * ad.log_op(1 - logistic_regression) gradients = ad.gradients(cross_entropy, [w])[0] executor = ad.Executor([cross_entropy, gradients]) weights = np.random.rand(1, np.shape(x_train)[1]) / 1000.0 #batch = 0 #previous_loss = 0 for i in range(5000): grad = np.zeros((1, np.shape(x_train)[1])) loss = 0 #go ramdomly over examples in each batch for _ in range(batch_size): t = random.choice(range(np.shape(x_train)[0])) x_flat = x_train[t].reshape((np.shape(x_train)[1], 1)) y_label = 0 if y_train[t] == class_1 else 1 loss_delta, grad_delta = executor.run(feed_dict={x : x_flat, w : weights, y : y_label}) grad += grad_delta loss += loss_delta weights = weights - (learning_rate * grad / batch_size) if i % 1000 == 0: print("loss = {:.3f} loss_delta = {:.3f}".format(loss[0][0], loss_delta[0][0])) return weights
def test_msr(): x = ad.Variable(name="x") y = ad.Variable(name="y") z = x * y l = ad.reduce_sum_op((x - z) * (x - z), axis=0) # c = 2*x c = ad.matmul_op(x - z, x - z, True, False) x_val = np.ones((10, 1)) y_val = np.ones((10, 1)) * 2 grad_x1, grad_y1 = ad.gradients(l, [x, y]) grad_x2, grad_y2 = ad.gradients(c, [x, y]) excutor = ad.Executor([l, c, grad_x1, grad_y1, grad_x2, grad_y2]) # excutor = ad.Executor([l, grad_x1, grad_y1, d]) loss, cost, grad_x1_val, grad_y1_val, grad_x2_val, grad_y2_val = excutor.run( feed_dict={ x: x_val, y: y_val }) # loss, grad_x1_val, grad_y1_val, d_val = excutor.run(feed_dict={x: x_val, y: y_val, z: z_val}) print loss print cost print "gx1: %s, gy1: %s" % (str(grad_x1_val), str(grad_y1_val)) print "gx2: %s, gy2: %s" % (str(grad_x2_val), str(grad_y2_val))
w = ad.Variable(name="w") b = ad.Variable(name="b") labels = ad.Variable(name="lables") # Computation graph def cross_entropy(output, labels): loss = -1.0 * ad.reduce_sum_op(labels * ad.log_op(output) + (1.0 - labels) * ad.log_op(1.0 - output), axis=1) return loss # Output of the hypothesis of logistic regression p = 1.0 / (1.0 + ad.exp_op((-1.0 * ad.matmul_op(w, x)))) # Loss node loss = cross_entropy(p, labels) # Gradient node of loss corresponding to w grad_y_w, = ad.gradients(loss, [w]) num_features = 2 num_points = 200 num_iterations = 1000 learning_rate = 0.01 # The dummy dataset consists of two classes. # The classes are modelled as a random normal variables with different means. class_1 = np.random.normal(2, 0.1, (int(num_points / 2), num_features)) class_2 = np.random.normal(4, 0.1, (int(num_points / 2), num_features))
# const data_size = 50 batch_size = 8 INPUT_NODE = 2 LAYER1_NODE = 10 OUTPUT_NODE = 1 # inputs x = ad.Variable(name='x') y_ = ad.Variable(name='y_') # parameters w1 = ad.Variable(name='w1') b1 = ad.Variable(name='b1') # forward y = ad.matmul_op(x, w1) + b1 # loss MSE = ad.pow_op((y - y_), 2) # backward grad_w1, grad_b1 = ad.gradients(MSE, [w1, b1]) executor = ad.Executor([MSE, grad_w1, grad_b1]) # fake data rmd = np.random.RandomState(1) X = np.linspace(0, 5, data_size) X = np.array([[i] for i in X]) Y = X * 2 plt.scatter(X, Y) # plt.show()
data_size = 100 batch_size = 8 INPUT_NODE = 2 LAYER1_NODE = 10 OUTPUT_NODE = 1 # inputs x = ad.Variable(name='x') y_ = ad.Variable(name='y_') # parameters w1 = ad.Variable(name='w1') w2 = ad.Variable(name='w2') b1 = ad.Variable(name='b1') b2 = ad.Variable(name='b2') # forward a = ad.matmul_op(x, w1) + b1 a = ad.relu_op(a) y = ad.matmul_op(a, w2) + b2 y = ad.relu_op(y) # loss cross_entropy = ad.pow_op((y - y_), 2) # backward grad_w1, grad_w2, grad_b1, grad_b2 = \ ad.gradients(cross_entropy, [w1, w2, b1, b2]) executor = ad.Executor([cross_entropy, grad_w1, grad_w2, grad_b1, grad_b2]) # fake data rmd = np.random.RandomState(1) X = rmd.rand(data_size, 2) Y = [[int(x1 + x2 < 1)] for (x1, x2) in X] for i in range(data_size):
ix = range(N * j, N * (j + 1)) r = np.linspace(0.0, 1, N) t = np.linspace(j * 4, (j + 1) * 4, N) + np.random.randn(N) * 0.2 # theta X_data[ix] = np.c_[r * np.sin(t), r * np.cos(t)] y_data[ix] = j y_one_hot = np.zeros((N * K, K)) y_one_hot[range(N * K), y_data] = 1 ###################################### x = ad.Variable(name="x") y = ad.Variable(name='y') w = ad.Variable(name="w") b = ad.Variable(name="b") z = ad.matmul_op(x, w) softmax = z + ad.broadcastto_op(b, z) loss = ad.softmax_crossentropy_op(softmax, y) grad_w, grad_b = ad.gradients(loss, [w, b]) executor = ad.Executor([loss, grad_w, grad_b]) w_val = np.zeros((D, K)) b_val = np.zeros(K) n_epoch = 1000 lr = 0.01 for i in range(n_epoch): loss_val, grad_w_val, grad_b_val = executor.run(feed_dict={x: X_data, w: w_val, y: y_one_hot, b: b_val}) if i % 10 == 0:
import numpy as np import autodiff as ad x = ad.Variable(name='x') y = ad.Variable(name='y') W = ad.Variable(name='W') b = ad.Variable(name='b') z = ad.matmul_op(x, W) output = z + ad.broadcastto_op(b, z) num_point = 1000 cost = ad.reduce_sum_op((y - output) * (y - output)) / (2.0 * num_point) grad_cost_w, grad_b = ad.gradients(cost, [W, b]) x_data = np.linspace(0, 10, num_point).reshape((num_point, 1)) y_data = 2.0 * x_data + np.random.uniform(-0.2, 0.2, (num_point, 1)) + 5.0 * np.ones( (num_point, 1)) w_val = np.zeros((1, 1)) b_val = np.zeros(1) executor = ad.Executor([cost, grad_cost_w, grad_b]) # train n_epoch = 2000 lr = 0.01 print("training...") for i in range(n_epoch): # evaluate the graph
def get_logistic_model(x, weight, bias): y = 1 / (1 + ad.exp_op(-1 * (ad.matmul_op(x, weight, trans_B=True) + bias))) #y = 1 / (1+ad.exp_op(-1 * (ad.mul_op(x, weight)+bias))) return y
LR = 0.0001 EPOCH = 500 x_val = np.linspace(-5, 5, 50) y_val = x_val * x_val + np.random.rand(50) * 0.5 x = ad.Variable(name='x') y_ = ad.Variable(name='y_') w1 = ad.Variable(name='w1', init_val=np.random.rand(1, 10)) b1 = ad.Variable(name='b1', init_val=np.random.rand(1, 10)) w2 = ad.Variable(name='w2', init_val=np.random.rand(10, 10)) b2 = ad.Variable(name='b2', init_val=np.random.rand(1, 10)) w3 = ad.Variable(name='w3', init_val=np.random.rand(10, 1)) b3 = ad.Variable(name='b3', init_val=np.random.rand(1, 1)) fc1 = ad.relu(ad.matmul_op(x, w1) + b1) fc2 = ad.relu(ad.matmul_op(fc1, w2) + b2) y = ad.matmul_op(fc2, w3) + b3 loss = (y_ - y) * (y_ - y) w1_grad, b1_grad, w2_grad, b2_grad, w3_grad, b3_grad = ad.gradients( loss, [w1, b1, w2, b2, w3, b3]) executor = ad.Executor( [loss, w1_grad, b1_grad, w2_grad, b2_grad, w3_grad, b3_grad]) for epoch in range(EPOCH): for i in range(x_val.shape[0]): loss_val, w1_grad_val, b1_grad_val, w2_grad_val, b2_grad_val, w3_grad_val, b3_grad_val = executor.run( feed_dict={