def logistic_regression( T, features, target, steps, learning_rate, sample, add_intercept=False ): if add_intercept: intercept = np.ones((features.shape[0], 1), dtype=T) features = np.hstack((intercept, features)) weights = np.zeros(features.shape[1], dtype=T) for step in range(steps): scores = np.dot(features, weights) predictions = sigmoid(scores) error = target - predictions gradient = np.dot(error, features) weights += learning_rate * gradient if step % sample == 0: print( "Log Likelihood of step " + str(step) + ": " + str(log_likelihood(features, target, weights)) ) return weights
def find_centroids(centroids, data, labels, pairwise_distances, zero_point, C, D): # Get the number of points associated with each centroid counts = np.bincount(labels, minlength=C) # Build label masks for each centroid and sum across all the # points assocated with each new centroid distance_sum = 0.0 for idx in range(C): # Boolean mask indicating where the points are for this center centroid_mask = labels == idx centroids[idx, :] = np.sum(np.where(centroid_mask[..., np.newaxis], data, zero_point), axis=0) distance_sum += np.sum( np.where(centroid_mask, pairwise_distances[:, idx], 0.0)) # To avoid introducing divide by zero errors # If a centroid has no weight, we'll do no normalization # This will keep its coordinates defined. counts = np.maximum(counts, np.ones((1, ), dtype=np.uint64)) centroids /= counts[:, np.newaxis] return distance_sum
def find_centroids(centroids, data, labels, pairwise_distances, zero_point, C): # Get the number of points associated with each centroid counts = np.bincount(labels, minlength=C) # more bincounts using the positions as weights produce the unnormalized # updated centroid locations (have to do each dimension separately since # a weight cannot be a vector) for idx in range(data.shape[1]): centroids[:, idx] = np.bincount(labels, weights=data[:, idx], minlength=C) # would have been nice if numpy offered a combined amin/argmin to avoid # iterating over pairwise_distances twice distance_sum = np.sum(np.amin(pairwise_distances, axis=1)) # To avoid introducing divide by zero errors # If a centroid has no weight, we'll do no normalization # This will keep its coordinates defined. counts = np.maximum(counts, np.ones((1, ), dtype=np.uint64)) centroids /= counts[:, np.newaxis] return distance_sum
def linear_regression(T, features, target, steps, learning_rate, sample, add_intercept=False): if add_intercept: intercept = np.ones((features.shape[0], 1), dtype=T) features = np.hstack((intercept, features)) weights = np.zeros(features.shape[1], dtype=T) for step in range(steps): scores = np.dot(features, weights) error = scores - target gradient = -(1.0 / len(features)) * error.dot(features) weights += learning_rate * gradient if step % sample == 0: print("Error of step " + str(step) + ": " + str(np.sum(np.power(error, 2)))) return weights
def test(): N = 100 A = np.ones((N, N)) B = np.arange(0, 10000).reshape((N, N)) C = A + B print(C)
def test(): x = lg.array([1, 2, 3]) y = np.array([1, 2, 3]) z = lg.array(y) assert np.array_equal(x, z) assert x.dtype == z.dtype xe = lg.empty((2, 3)) ye = np.empty((2, 3)) assert lg.shape(xe) == np.shape(ye) assert xe.dtype == ye.dtype xz = lg.zeros((2, 3)) yz = np.zeros((2, 3)) assert np.array_equal(xz, yz) assert xz.dtype == yz.dtype xo = lg.ones((2, 3)) yo = np.ones((2, 3)) assert np.array_equal(xo, yo) assert xo.dtype == yo.dtype xf = lg.full((2, 3), 3) yf = np.full((2, 3), 3) assert np.array_equal(xf, yf) assert xf.dtype == yf.dtype xel = lg.empty_like(x) yel = np.empty_like(y) assert lg.shape(xel) == np.shape(yel) assert xel.dtype == yel.dtype xzl = lg.zeros_like(x) yzl = np.zeros_like(y) assert np.array_equal(xzl, yzl) assert xzl.dtype == yzl.dtype xol = lg.ones_like(x) yol = np.ones_like(y) assert np.array_equal(xol, yol) assert xol.dtype == yol.dtype xfl = lg.full_like(x, 3) yfl = np.full_like(y, 3) assert np.array_equal(xfl, yfl) assert xfl.dtype == yfl.dtype x = lg.arange(10) y = np.arange(10) assert np.array_equal(x, y) assert x.dtype == y.dtype x = lg.arange(10, dtype=np.int32) y = np.arange(10, dtype=np.int32) assert np.array_equal(x, y) assert x.dtype == y.dtype x = lg.arange(2.0, 10.0) y = np.arange(2.0, 10.0) assert np.array_equal(x, y) assert x.dtype == y.dtype x = lg.arange(2, 30, 3) y = np.arange(2, 30, 3) assert np.array_equal(x, y) assert x.dtype == y.dtype # xfls = lg.full_like(x, '3', dtype=np.str_) # yfls = np.full_like(y, '3', dtype=np.str_) # assert(lg.array_equal(xfls, yfls)) # assert(xfls.dtype == yfls.dtype) return