def run_gemm(N, I, ft): # noqa: E741 print("Problem Size: M=" + str(N) + " N=" + str(N) + " K=" + str(N)) print("Total Iterations: " + str(I)) flops = total_flops(N, N, N) print("Total Flops: " + str(flops / 1e9) + " GFLOPS/iter") space = total_space(N, N, N, ft) print("Total Size: " + str(space / 1e6) + " MB") A, B, C = initialize(N, N, N, ft) # Compute some sums and check for NaNs to force synchronization # before we start the timing assert not math.isnan(np.sum(A)) assert not math.isnan(np.sum(B)) assert not math.isnan(np.sum(C)) start = datetime.datetime.now() # Run for as many iterations as was requested for idx in range(I): np.dot(A, B, out=C) # We need to rotate the matrices to keep Legate honest # about moving data so it can't just duplicate A and B # on the first iteration and reuse them, this means # that A, B, C all need to be square A, B, C = B, C, A # Do another sum to synchronize for timings, B is last output assert not math.isnan(np.sum(B)) stop = datetime.datetime.now() delta = stop - start total = delta.total_seconds() * 1000.0 print("Elapsed Time: " + str(total) + " ms") average = total / I print("Average GEMM: " + str(average) + " ms") print("FLOPS/s: " + str(flops / (average * 1e6)) + " GFLOPS/s") return total
def test(): pythonX = np.reshape(np.linspace(0, 10001, 10000, dtype=int), (100, 100)) x = lg.array(pythonX) pythonY = np.sum(pythonX, axis=0) y = lg.sum(x, axis=0) assert np.array_equal(pythonY, y) pythonY = np.sum(pythonX, axis=1) y = lg.sum(x, axis=1) assert np.array_equal(pythonY, y) return
def run_kmeans(C, D, T, I, N, S, benchmarking): # noqa: E741 print("Running kmeans...") print("Number of data points: " + str(N)) print("Number of dimensions: " + str(D)) print("Number of centroids: " + str(C)) print("Max iterations: " + str(I)) start = datetime.datetime.now() data, centroids = initialize(N, D, C, T) data_dots = np.square(np.linalg.norm(data, ord=2, axis=1)) data_index = np.linspace(0, N - 1, N, dtype=np.int) labels = None iteration = 0 prior_distance_sum = None # We run for max iterations or until we converge # We only test convergence every S iterations while iteration < I: pairwise_distances = calculate_distances(data, centroids, data_dots) new_labels, distances = relabel(pairwise_distances, data_index) distance_sum = np.sum(distances) centroids = find_centroids(data, new_labels, C, D) if iteration > 0 and iteration % S == 0: changes = np.not_equal(labels, new_labels) total_changes = np.sum(changes) delta = distance_sum / prior_distance_sum print("Iteration " + str(iteration) + " produced " + str(total_changes) + " changes, and total distance is " + str(distance_sum)) # We ignore the result of the threshold test in the case that we # are running performance benchmarks to measure performance for a # certain number of iterations if delta > 1 - 0.000001 and not benchmarking: print("Threshold triggered, terminating iterations early") break prior_distance_sum = distance_sum labels = new_labels iteration += 1 # This final distance sum also synchronizes the results print("Final distance sum at iteration " + str(iteration) + ": " + str(prior_distance_sum)) stop = datetime.datetime.now() delta = stop - start total = delta.total_seconds() * 1000.0 print("Elapsed Time: " + str(total) + " ms") return total
def run_black_scholes(N, D): print("Running black scholes on %dK options..." % N) N *= 1000 start = datetime.datetime.now() S, X, T, R, V = initialize(N, D) call, put = black_scholes(S, X, T, R, V) # Check the result for NaNs to synchronize before stopping timing call_sum = np.sum(call) put_sum = np.sum(put) assert not math.isnan(call_sum) and not math.isnan(put_sum) stop = datetime.datetime.now() delta = stop - start total = delta.total_seconds() * 1000.0 print("Elapsed Time: " + str(total) + " ms") return total
def test(): numpyX = np.array([1 + 4j, 2 + 5j, 3 + 6j], np.complex64) x = lg.array(numpyX) z = lg.sum(x) assert lg.all(lg.abs(z - np.sum(numpyX)) < 1e-5) z = lg.prod(x) assert lg.all(lg.abs(z - np.prod(numpyX)) < 1e-5) return
def test(): np.random.seed(42) b = np.random.random((10, 12, 13)) a = lg.array(b) assert np.allclose(a, b) lg_sum = lg.sum(a) np_sum = np.sum(b) assert np.allclose(np_sum, lg_sum) return
def run_lstm(batch_size, hidden_size, sentence_length, word_size, timing): start = datetime.datetime.now() X = np.random.randn(sentence_length, batch_size, hidden_size) h0 = np.random.randn(1, hidden_size) WLSTM = np.random.randn( word_size + hidden_size, 4 * hidden_size ) / np.sqrt(word_size + hidden_size) xphpb = WLSTM.shape[0] d = hidden_size n = sentence_length b = batch_size Hin = np.zeros((n, b, xphpb)) Hout = np.zeros((n, b, d)) IFOG = np.zeros((n, b, d * 4)) IFOGf = np.zeros((n, b, d * 4)) C = np.zeros((n, b, d)) Ct = np.zeros((n, b, d)) for t in range(0, n): if t == 0: prev = np.tile(h0, (b, 1)) else: prev = Hout[t - 1] Hin[t, :, :word_size] = X[t] Hin[t, :, word_size:] = prev # compute all gate activations. dots: IFOG[t] = Hin[t].dot(WLSTM) # non-linearities IFOGf[t, :, : 3 * d] = 1.0 / ( 1.0 + np.exp(-IFOG[t, :, : 3 * d]) ) # sigmoids these are the gates IFOGf[t, :, 3 * d :] = np.tanh(IFOG[t, :, 3 * d :]) # tanh # compute the cell activation C[t] = IFOGf[t, :, :d] * IFOGf[t, :, 3 * d :] if t > 0: C[t] += IFOGf[t, :, d : 2 * d] * C[t - 1] Ct[t] = np.tanh(C[t]) Hout[t] = IFOGf[t, :, 2 * d : 3 * d] * Ct[t] # Do a little sum of the outputs to synchronize and check for NaNs total = np.sum(Hout) assert not math.isnan(total) stop = datetime.datetime.now() delta = stop - start total = delta.total_seconds() * 1000.0 if timing: print("Elapsed Time: " + str(total) + " ms") return total
def run_wgrad(H=256, W=256, B=32, C=256, K=32, R=5, S=5, timing=False): if timing: start = datetime.datetime.now() x, y = initialize(C, K, B, H, W) dw = cross_correlate(x, y, C, K, R, S, B, H, W) # Do a little sum over dw to sync the results total = np.sum(dw) assert not math.isnan(total) if timing: stop = datetime.datetime.now() delta = stop - start print("Elapsed Time: " + str(delta.total_seconds() * 1000.0) + " ms")
def find_centroids(centroids, data, labels, pairwise_distances, zero_point, C, D): # Get the number of points associated with each centroid counts = np.bincount(labels, minlength=C) # Build label masks for each centroid and sum across all the # points assocated with each new centroid distance_sum = 0.0 for idx in range(C): # Boolean mask indicating where the points are for this center centroid_mask = labels == idx centroids[idx, :] = np.sum(np.where(centroid_mask[..., np.newaxis], data, zero_point), axis=0) distance_sum += np.sum( np.where(centroid_mask, pairwise_distances[:, idx], 0.0)) # To avoid introducing divide by zero errors # If a centroid has no weight, we'll do no normalization # This will keep its coordinates defined. counts = np.maximum(counts, np.ones((1, ), dtype=np.uint64)) centroids /= counts[:, np.newaxis] return distance_sum
def cross_correlate(x, y, C, K, R, S, B, H, W): dw = np.zeros(shape=(R, S, C, K)) # cross-correlate images to compute weight gradients y_pad = np.zeros(shape=(K, B, H + R - 1, W + S - 1)) y_pad[:, :, R / 2:-(R / 2), S / 2:-(S / 2)] = y for r in range(R): for s in range(S): y_shift = y_pad[:, :, r:r + H, s:s + W] for c in range(C): for k in range(K): dw[r, s, c, k] = np.sum(x[c, :, :, :] * y_shift[k, :, :, :]) return dw
def run(grid, I, N): # noqa: E741 print("Running Jacobi stencil...") center = grid[1:-1, 1:-1] north = grid[0:-2, 1:-1] east = grid[1:-1, 2:] west = grid[1:-1, 0:-2] south = grid[2:, 1:-1] for i in range(I): average = center + north + east + west + south work = 0.2 * average # delta = np.sum(np.absolute(work - center)) center[:] = work total = np.sum(center) return total / (N**2)
def run_linear_regression(N, F, T, I, S, B): # noqa: E741 print("Running linear regression...") print("Number of data points: " + str(N) + "K") print("Number of features: " + str(F)) print("Number of iterations: " + str(I)) start = datetime.datetime.now() features, target = initialize(N * 1000, F, T) weights = linear_regression(T, features, target, I, 1e-5, S, B) # Check the weights for NaNs to synchronize before stopping timing assert not math.isnan(np.sum(weights)) stop = datetime.datetime.now() delta = stop - start total = delta.total_seconds() * 1000.0 print("Elapsed Time: " + str(total) + " ms") return total
def test(): x = [1.0, 2, 3] y = [4, 5, 6] z = x + y numpyResult = np.sum(z) # print(numpyResult) gx = lg.array(x) gy = lg.array(y) z = gx + gy legate_oldResult = lg.sum(z) # print(legate_oldResult) assert legate_oldResult == numpyResult return
def run_jacobi(N, iters, perform_check, timing, verbose): start = datetime.datetime.now() A, b = generate_random(N) x = solve(A, b, iters, verbose) if perform_check: check(A, x, b) else: # Need a synchronization here for timing assert not math.isnan(np.sum(x)) stop = datetime.datetime.now() delta = stop - start total = delta.total_seconds() * 1000.0 if timing: print("Elapsed Time: " + str(total) + " ms") return total
def find_centroids(data, labels, C, D): # Sort the points by their labels indices = np.argsort(labels) sorted_points = data[indices] # Compute counts and indexes for ending of sets of points for each centroid counts = np.bincount(labels, minlength=C) indexes = np.cumsum(counts) # Now we can use the indexes to split the array into sub-arrays and then # sum across them to create the centroids centroids = np.empty((C, D), dtype=data.dtype) ragged_arrays = np.split(sorted_points, indexes) for idx in xrange(C): centroids[idx, :] = np.sum(ragged_arrays[idx], axis=0) # To avoid introducing divide by zero errors # If a centroid has no weight, we'll do no normalization # This will keep its coordinates defined. counts = np.maximum(counts, 1) return centroids / counts[:, np.newaxis]
def forward(x, h_prev, C_prev, H_size, X_size, p): assert x.shape == (X_size, 1) assert h_prev.shape == (H_size, 1) assert C_prev.shape == (H_size, 1) z = np.row_stack((h_prev, x)) f = sigmoid(np.dot(p.W_f.v, z) + p.b_f.v) i = sigmoid(np.dot(p.W_i.v, z) + p.b_i.v) C_bar = tanh(np.dot(p.W_C.v, z) + p.b_C.v) C = f * C_prev + i * C_bar o = sigmoid(np.dot(p.W_o.v, z) + p.b_o.v) h = o * tanh(C) v = np.dot(p.W_v.v, h) + p.b_v.v y = np.exp(v) / np.sum(np.exp(v)) # softmax return z, f, i, C_bar, C, o, h, v, y
def test(): anp = np.array([[1, 2, 3], [4, 5, 6]]) a = lg.array(anp) r = a.sum(0) assert np.array_equal(r, [5, 7, 9]) r = a.sum(1) assert np.array_equal(r, [6, 15]) bnp = np.random.random((2, 3)) b = lg.array(bnp) assert np.allclose(lg.sum(b), np.sum(bnp)) af = np.random.randn(4, 5) bf = lg.array(af) assert np.allclose(af.mean(0), bf.mean(0)) assert np.allclose(af.mean(), bf.mean()) return
def find_centroids(centroids, data, labels, pairwise_distances, zero_point, C): # Get the number of points associated with each centroid counts = np.bincount(labels, minlength=C) # more bincounts using the positions as weights produce the unnormalized # updated centroid locations (have to do each dimension separately since # a weight cannot be a vector) for idx in range(data.shape[1]): centroids[:, idx] = np.bincount(labels, weights=data[:, idx], minlength=C) # would have been nice if numpy offered a combined amin/argmin to avoid # iterating over pairwise_distances twice distance_sum = np.sum(np.amin(pairwise_distances, axis=1)) # To avoid introducing divide by zero errors # If a centroid has no weight, we'll do no normalization # This will keep its coordinates defined. counts = np.maximum(counts, np.ones((1, ), dtype=np.uint64)) centroids /= counts[:, np.newaxis] return distance_sum
def test(): height = 10 width = 10 grid = lg.zeros((height + 2, width + 2), np.float32) grid[:, 0] = -273.15 grid[:, -1] = -273.15 grid[-1, :] = -273.15 grid[0, :] = 40.0 center = grid[1:-1, 1:-1] north = grid[0:-2, 1:-1] east = grid[1:-1, 2:] west = grid[1:-1, 0:-2] south = grid[2:, 1:-1] for i in range(2): average = center + north + east + west + south work = 0.2 * average delta = lg.sum(lg.absolute(work - center)) center[:] = work npGrid = np.zeros((height + 2, width + 2), np.float32) npGrid[:, 0] = -273.15 npGrid[:, -1] = -273.15 npGrid[-1, :] = -273.15 npGrid[0, :] = 40.0 npcenter = npGrid[1:-1, 1:-1] npnorth = npGrid[0:-2, 1:-1] npeast = npGrid[1:-1, 2:] npwest = npGrid[1:-1, 0:-2] npsouth = npGrid[2:, 1:-1] for i in range(2): npaverage = npcenter + npnorth + npeast + npwest + npsouth npwork = 0.2 * npaverage nptemp = np.absolute(npwork - npcenter) npdelta = np.sum(nptemp) npcenter[:] = npwork assert np.allclose(delta, npdelta) return
def linear_regression(T, features, target, steps, learning_rate, sample, add_intercept=False): if add_intercept: intercept = np.ones((features.shape[0], 1), dtype=T) features = np.hstack((intercept, features)) weights = np.zeros(features.shape[1], dtype=T) for step in range(steps): scores = np.dot(features, weights) error = scores - target gradient = -(1.0 / len(features)) * error.dot(features) weights += learning_rate * gradient if step % sample == 0: print("Error of step " + str(step) + ": " + str(np.sum(np.power(error, 2)))) return weights
def run_lstm(batch_size, hidden_size, sentence_length, word_size, timing): start = datetime.datetime.now() WLSTM = np.random.randn(word_size + hidden_size, 4 * hidden_size) / np.sqrt(word_size + hidden_size) xphpb = WLSTM.shape[0] d = hidden_size n = sentence_length b = batch_size dHout = np.random.randn(n, b, d) IFOGf = np.random.randn(n, b, d * 4) C = np.random.randn(n, b, d) Ct = np.random.randn(n, b, d) Hin = np.random.randn(n, b, xphpb) dIFOG = np.zeros((n, b, d * 4)) dIFOGf = np.zeros(IFOGf.shape) dHin = np.zeros(Hin.shape) dC = np.zeros(C.shape) dh0 = np.zeros((1, d)) for t in reversed(range(n)): tanhCt = Ct[t] dIFOGf[t, :, 2 * d:3 * d] = tanhCt * dHout[t] # backprop tanh non-linearity first then continue backprop dC[t] += (1 - tanhCt**2) * (IFOGf[t, :, 2 * d:3 * d] * dHout[t]) if t > 0: dIFOGf[t, :, d:2 * d] = C[t - 1] * dC[t] dC[t - 1] += IFOGf[t, :, d:2 * d] * dC[t] dIFOGf[t, :, :d] = IFOGf[t, :, 3 * d:] * dC[t] dIFOGf[t, :, 3 * d:] = IFOGf[t, :, :d] * dC[t] # backprop activation functions dIFOG[t, :, 3 * d:] = (1 - IFOGf[t, :, 3 * d:]**2) * dIFOGf[t, :, 3 * d:] y = IFOGf[t, :, :3 * d] dIFOG[t, :, :3 * d] = (y * (1.0 - y)) * dIFOGf[t, :, :3 * d] # backprop matrix multiply dHin[t] = dIFOG[t].dot(WLSTM.transpose()) # backprop the identity transforms into Hin if t > 0: dHout[t - 1, :] += dHin[t, :, word_size:] else: dh0[0] += np.sum(dHin[t, :, word_size:], 0) # Do a little sum to synchronize and check for NaNs total = np.sum(dh0) assert not math.isnan(total) stop = datetime.datetime.now() delta = stop - start total = delta.total_seconds() * 1000.0 if timing: print("Elapsed Time: " + str(total) + " ms") return total
def log_likelihood(features, target, weights): scores = np.dot(features, weights) return np.sum(target * scores - np.log(1.0 + np.exp(scores)))
def testtion(): word_size = 10 hidden_size = 10 sentence_length = 5 batch_size = 3 lg.random.seed(42) WLSTM = lg.random.randn(word_size + hidden_size, 4 * hidden_size) / lg.sqrt(word_size + hidden_size) xphpb = WLSTM.shape[0] d = hidden_size n = sentence_length b = batch_size dHout = lg.random.randn(n, b, d) IFOGf = lg.random.randn(n, b, d * 4) C = lg.random.randn(n, b, d) Ct = lg.random.randn(n, b, d) Hin = lg.random.randn(n, b, xphpb) dIFOG = lg.zeros((n, b, d * 4)) dIFOGf = lg.zeros(IFOGf.shape) dHin = lg.zeros(Hin.shape) dC = lg.zeros(C.shape) dh0 = lg.zeros((1, d)) for t in reversed(range(n)): tanhCt = Ct[t] dIFOGf[t, :, 2 * d:3 * d] = tanhCt * dHout[t] # backprop tanh non-linearity first then continue backprop dC[t] += (1 - tanhCt**2) * (IFOGf[t, :, 2 * d:3 * d] * dHout[t]) if t > 0: dIFOGf[t, :, d:2 * d] = C[t - 1] * dC[t] dC[t - 1] += IFOGf[t, :, d:2 * d] * dC[t] dIFOGf[t, :, :d] = IFOGf[t, :, 3 * d:] * dC[t] dIFOGf[t, :, 3 * d:] = IFOGf[t, :, :d] * dC[t] # backprop activation functions dIFOG[t, :, 3 * d:] = (1 - IFOGf[t, :, 3 * d:]**2) * dIFOGf[t, :, 3 * d:] y = IFOGf[t, :, :3 * d] dIFOG[t, :, :3 * d] = (y * (1.0 - y)) * dIFOGf[t, :, :3 * d] # backprop matrix multiply dHin[t] = dIFOG[t].dot(WLSTM.transpose()) # backprop the identity transforms into Hin if t > 0: dHout[t - 1, :] += dHin[t, :, word_size:] else: dh0[0] += lg.sum(dHin[t, :, word_size:], 0) np.random.seed(42) WLSTM = np.random.randn(word_size + hidden_size, 4 * hidden_size) / np.sqrt(word_size + hidden_size) xphpb = WLSTM.shape[0] d = hidden_size n = sentence_length b = batch_size dHout = np.random.randn(n, b, d) IFOGf = np.random.randn(n, b, d * 4) C = np.random.randn(n, b, d) Ct = np.random.randn(n, b, d) Hin = np.random.randn(n, b, xphpb) dIFOG = np.zeros((n, b, d * 4)) dIFOGf = np.zeros(IFOGf.shape) dHin = np.zeros(Hin.shape) dC = np.zeros(C.shape) dhnp0 = np.zeros((1, d)) for t in reversed(range(n)): tanhCt = Ct[t] dIFOGf[t, :, 2 * d:3 * d] = tanhCt * dHout[t] # backprop tanh non-linearity first then continue backprop dC[t] += (1 - tanhCt**2) * (IFOGf[t, :, 2 * d:3 * d] * dHout[t]) if t > 0: dIFOGf[t, :, d:2 * d] = C[t - 1] * dC[t] dC[t - 1] += IFOGf[t, :, d:2 * d] * dC[t] dIFOGf[t, :, :d] = IFOGf[t, :, 3 * d:] * dC[t] dIFOGf[t, :, 3 * d:] = IFOGf[t, :, :d] * dC[t] # backprop activation functions dIFOG[t, :, 3 * d:] = (1 - IFOGf[t, :, 3 * d:]**2) * dIFOGf[t, :, 3 * d:] y = IFOGf[t, :, :3 * d] dIFOG[t, :, :3 * d] = (y * (1.0 - y)) * dIFOGf[t, :, :3 * d] # backprop matrix multiply dHin[t] = dIFOG[t].dot(WLSTM.transpose()) # backprop the identity transforms into Hin if t > 0: dHout[t - 1, :] += dHin[t, :, word_size:] else: dhnp0[0] += np.sum(dHin[t, :, word_size:], 0) assert np.allclose(dh0[0], dhnp0[0])
def testtion(): word_size = 10 hidden_size = 10 sentence_length = 5 batch_size = 3 np.random.seed(42) WLSTM_np = np.random.randn( word_size + hidden_size, 4 * hidden_size ) / np.sqrt(word_size + hidden_size) xphpb = WLSTM_np.shape[0] d = hidden_size n = sentence_length b = batch_size WLSTM_lg = lg.array(WLSTM_np) dHout_np = np.random.randn(n, b, d) IFOGf_np = np.random.randn(n, b, d * 4) C_np = np.random.randn(n, b, d) Ct_np = np.random.randn(n, b, d) Hin_np = np.random.randn(n, b, xphpb) dIFOG_np = np.zeros((n, b, d * 4)) dIFOGf_np = np.zeros(IFOGf_np.shape) dHin_np = np.zeros(Hin_np.shape) dC_np = np.zeros(C_np.shape) dh0_np = np.zeros((1, d)) dHout_lg = lg.array(dHout_np) IFOGf_lg = lg.array(IFOGf_np) C_lg = lg.array(C_np) Ct_lg = lg.array(Ct_np) Hin_lg = lg.array(Hin_np) dIFOG_lg = lg.zeros((n, b, d * 4)) dIFOGf_lg = lg.zeros(IFOGf_lg.shape) dHin_lg = lg.zeros(Hin_lg.shape) dC_lg = lg.zeros(C_lg.shape) dh0_lg = lg.zeros((1, d)) for t in reversed(range(n)): tanhCt_np = Ct_np[t] tanhCt_lg = Ct_lg[t] # assert lg.allclose(tanhCt_np, tanhCt_lg) dIFOGf_np[t, :, 2 * d : 3 * d] = tanhCt_np * dHout_np[t] dIFOGf_lg[t, :, 2 * d : 3 * d] = tanhCt_lg * dHout_lg[t] # assert lg.allclose(dIFOGf_np[t,:,2*d:3*d], dIFOGf_lg[t,:,2*d:3*d]) # backprop tanh non-linearity first then continue backprop dC_np[t] += (1 - tanhCt_np ** 2) * ( IFOGf_np[t, :, 2 * d : 3 * d] * dHout_np[t] ) dC_lg[t] += (1 - tanhCt_lg ** 2) * ( IFOGf_lg[t, :, 2 * d : 3 * d] * dHout_lg[t] ) # assert lg.allclose(dC_np[t], dC_lg[t]) if t > 0: dIFOGf_np[t, :, d : 2 * d] = C_np[t - 1] * dC_np[t] dIFOGf_lg[t, :, d : 2 * d] = C_lg[t - 1] * dC_lg[t] # assert lg.allclose(dIFOGf_np[t,:,d:2*d], dIFOGf_lg[t,:,d:2*d]) dC_np[t - 1] += IFOGf_np[t, :, d : 2 * d] * dC_np[t] dC_lg[t - 1] += IFOGf_lg[t, :, d : 2 * d] * dC_lg[t] # assert lg.allclose(dC_np[t-1], dC_lg[t-1]) dIFOGf_np[t, :, :d] = IFOGf_np[t, :, 3 * d :] * dC_np[t] dIFOGf_lg[t, :, :d] = IFOGf_lg[t, :, 3 * d :] * dC_lg[t] # assert lg.allclose(dIFOGf_np[t,:,:d], dIFOGf_lg[t,:,:d]) dIFOGf_np[t, :, 3 * d :] = IFOGf_np[t, :, :d] * dC_np[t] dIFOGf_lg[t, :, 3 * d :] = IFOGf_lg[t, :, :d] * dC_lg[t] # assert lg.allclose(dIFOGf_np, dIFOGf_lg) # backprop activation functions dIFOG_np[t, :, 3 * d :] = ( 1 - IFOGf_np[t, :, 3 * d :] ** 2 ) * dIFOGf_np[t, :, 3 * d :] dIFOG_lg[t, :, 3 * d :] = ( 1 - IFOGf_lg[t, :, 3 * d :] ** 2 ) * dIFOGf_lg[t, :, 3 * d :] # assert lg.allclose(dIFOG_np[t,:,3*d:], dIFOG_lg[t,:,3*d:]) y_np = IFOGf_np[t, :, : 3 * d] y_lg = IFOGf_lg[t, :, : 3 * d] # assert lg.allclose(y_np, y_lg) dIFOG_np[t, :, : 3 * d] = (y_np * (1.0 - y_np)) * dIFOGf_np[ t, :, : 3 * d ] dIFOG_lg[t, :, : 3 * d] = (y_lg * (1.0 - y_lg)) * dIFOGf_lg[ t, :, : 3 * d ] # assert lg.allclose(dIFOG_np[t,:,:3*d], dIFOG_lg[t,:,:3*d]) # backprop matrix multiply dHin_np[t] = dIFOG_np[t].dot(WLSTM_np.transpose()) dHin_lg[t] = dIFOG_lg[t].dot(WLSTM_lg.transpose()) # assert lg.allclose(dHin_np[t], dHin_lg[t]) # backprop the identity transforms into Hin if t > 0: dHout_np[t - 1, :] += dHin_np[t, :, word_size:] dHout_lg[t - 1, :] += dHin_lg[t, :, word_size:] # assert lg.allclose(dHout_np[t-1,:], dHout_lg[t-1,:]) else: dh0_np[0] += np.sum(dHin_np[t, :, word_size:], 0) dh0_lg[0] += lg.sum(dHin_lg[t, :, word_size:], 0) # Check this one at the end # print(dh0_np[0]) # print(dh0_lg[0]) assert np.allclose(dh0_np[0], dh0_lg[0])
def test(): x = lg.array([]) r = lg.sum(x) assert r == 0 x = lg.array([1]) r = lg.sum(x) assert r == 1 x = lg.eye(3) r = lg.sum(x) assert r == 3 x = lg.array([1, 2, 3, 4.0]) r = lg.sum(x) r2 = lg.add.reduce(x) assert r == r2 == 10 x = lg.array([1, 2, 3, 4.0, 5.0]) r = lg.prod(x) r2 = lg.multiply.reduce(x) assert r == r2 == 120 asserts.assert_equal(lg.sum([]), np.sum([])) asserts.assert_equal(lg.add.reduce([]), np.add.reduce([])) asserts.assert_equal(lg.sum([[], []]), np.sum([[], []])) asserts.assert_equal(lg.add.reduce([[], []]), np.add.reduce([[], []])) asserts.assert_equal(lg.sum(lg.array([0])), np.sum(np.array([0]))) asserts.assert_equal( lg.add.reduce(lg.array([0])), np.add.reduce(np.array([0])) ) asserts.assert_equal(lg.sum([1]), np.sum([1])) asserts.assert_equal(lg.add.reduce([1]), np.add.reduce([1])) asserts.assert_equal(lg.sum(0), np.sum(0)) asserts.assert_equal(lg.add.reduce(0), np.add.reduce(0)) asserts.assert_equal(lg.sum(1), np.sum(1)) asserts.assert_equal(lg.add.reduce(1), np.add.reduce(1)) x = lg.array([1, 0, 2, -1, 0, 0, 8]) x_np = np.array([1, 0, 2, -1, 0, 0, 8]) asserts.assert_equal(lg.sum(x), np.sum(x_np)) asserts.assert_equal(lg.add.reduce(x), np.add.reduce(x_np)) x = lg.array([[0, 1, 0], [2, 0, 3]]) x_np = np.array([[0, 1, 0], [2, 0, 3]]) asserts.assert_equal(lg.sum(x), np.sum(x_np)) asserts.assert_equal(lg.add.reduce(x), np.add.reduce(x_np)) x = lg.eye(3) x_np = np.eye(3) asserts.assert_equal(lg.sum(x), np.sum(x_np)) asserts.assert_equal(lg.add.reduce(x), np.add.reduce(x_np)) x = lg.array( [ [[0, 1], [1, 1], [7, 0], [1, 0], [0, 1]], [[3, 0], [0, 3], [0, 0], [2, 2], [0, 19]], ] ) x_np = np.array( [ [[0, 1], [1, 1], [7, 0], [1, 0], [0, 1]], [[3, 0], [0, 3], [0, 0], [2, 2], [0, 19]], ] ) asserts.assert_equal(lg.sum(x, axis=0), np.sum(x_np, axis=0)) asserts.assert_equal(lg.sum(x, axis=1), np.sum(x_np, axis=1)) asserts.assert_equal(lg.sum(x, axis=2), np.sum(x_np, axis=2)) asserts.assert_equal(lg.add.reduce(x, axis=0), np.add.reduce(x_np, axis=0)) asserts.assert_equal(lg.add.reduce(x, axis=1), np.add.reduce(x_np, axis=1)) asserts.assert_equal(lg.add.reduce(x, axis=2), np.add.reduce(x_np, axis=2)) asserts.assert_equal(lg.sum(x), np.sum(x_np)) asserts.assert_equal(lg.add.reduce(x), np.add.reduce(x_np)) x_np = np.concatenate((x_np,) * 2000, axis=1) x = lg.array(x_np) asserts.assert_equal(lg.sum(x, axis=0), np.sum(x_np, axis=0)) asserts.assert_equal(lg.sum(x, axis=1), np.sum(x_np, axis=1)) asserts.assert_equal(lg.sum(x, axis=2), np.sum(x_np, axis=2)) asserts.assert_equal(lg.sum(x), np.sum(x_np)) asserts.assert_equal(lg.add.reduce(x, axis=0), np.add.reduce(x_np, axis=0)) asserts.assert_equal(lg.add.reduce(x, axis=1), np.add.reduce(x_np, axis=1)) asserts.assert_equal(lg.add.reduce(x, axis=2), np.add.reduce(x_np, axis=2)) asserts.assert_equal(lg.add.reduce(x), np.add.reduce(x_np)) x_np = np.random.randn(100) indices = np.random.choice( np.arange(x_np.size), replace=False, size=int(x_np.size * 0.2) ) x_np[indices] = 0 x = lg.array(x_np) asserts.assert_allclose(lg.sum(x), np.sum(x_np)) asserts.assert_allclose(lg.add.reduce(x), np.add.reduce(x_np)) x_np = x_np.reshape(10, 10) x = lg.array(x_np) asserts.assert_allclose(lg.sum(x), np.sum(x_np)) asserts.assert_allclose(lg.add.reduce(x), np.add.reduce(x_np)) return