def f_df(x): X = x.reshape(Xshape) Yest = softmax(np.dot(A, X), axis=1) cost = -np.log(np.maximum(Yest[mi, yi], 1e-16)).sum() E = Yest - Y grad = np.dot(A.T, E) if lamb > 0: cost += 0.5 * lamb * (X**2).sum() grad += lamb * X return cost, grad.ravel()
def f_df(a, X, yi): yest = softmax(np.dot(a, X), axis=1) cost = -np.log(np.maximum(yest[bi, yi], 1e-16)).sum() e = yest # note: can copy if yest needed later e[bi, yi] -= 1 grad = np.dot(a.T, e) if lamb > 0: cost += 0.5 * lamb * (X**2).sum() grad += lamb * X return cost, grad
def __call__(self, A, Y, rng=None, E=None): from nengo_extras.convnet import softmax import scipy.optimize tstart = time.time() assert A.shape[0] == Y.shape[0] m, n = A.shape _, d = Y.shape Xshape = (n, d) # regularization sigma = self.reg * A.max() lamb = m * sigma**2 # --- initialization X0 = np.zeros(Xshape) # X0 = rng.normal(scale=1./m, size=Xshape) # X0, _ = nengo.solvers.LstsqL2(reg=self.reg)(A, Y, rng=rng, E=E) # --- solve with L-BFGS yi = Y.argmax(axis=1) mi = np.arange(m) def f_df(x): X = x.reshape(Xshape) Yest = softmax(np.dot(A, X), axis=1) cost = -np.log(np.maximum(Yest[mi, yi], 1e-16)).sum() E = Yest - Y grad = np.dot(A.T, E) if lamb > 0: cost += 0.5 * lamb * (X**2).sum() grad += lamb * X return cost, grad.ravel() x0 = X0.ravel() x, mincost, info = scipy.optimize.fmin_l_bfgs_b( f_df, x0, maxfun=self.n_epochs, iprint=self.verbose) t = time.time() - tstart X = x.reshape(Xshape) return self.mul_encoders(X, E), { 'rmses': npext.rms(softmax(np.dot(A, X), axis=1) - Y, axis=1), 'time': t, 'iterations': info['funcalls'], }
def solve_Softmax(solver, queue, clA, Y, rng=None, E=None): from nengo_extras.convnet import softmax import scipy.optimize import pyopencl_blas pyopencl_blas.setup() tstart = time.time() assert clA.shape[0] == Y.shape[0] m, n = clA.shape _, d = Y.shape Xshape = (n, d) # regularization sigma = solver.reg * cl.array.max(clA).get() lamb = m * sigma**2 # --- initialization # X0 = np.zeros(Xshape, dtype=np.float32) X0 = np.zeros(Xshape, dtype=np.float64) # --- solve with L-BFGS clY = cl.array.to_device(queue, Y.astype(np.float32)) clyi = cl.array.to_device(queue, np.argmax(Y, axis=1).astype(np.int32)) clX = cl.array.Array(queue, (n, d), dtype=np.float32) clE = cl.array.Array(queue, (m, d), dtype=np.float32) clG = cl.array.Array(queue, (n, d), dtype=np.float32) softmax_plan = plan_softmax(queue, clE, clE) # sum_square = cl.reduction.ReductionKernel( # queue.context, np.float32, neutral="0", # reduce_expr="a+b", map_expr="x[i]*x[i]", # arguments="__global float *x") sum_logloss = cl.reduction.ReductionKernel( queue.context, np.float32, neutral="0", reduce_expr="a+b", map_expr="-log(max(Y[i*%(d)d + yi[i]], 1e-16f))" % dict(d=d), arguments="__global const int *yi, __global const float *Y") assert clE.elemstrides[0] == d def f_df(x): clX.set(x.astype(np.float32).reshape(Xshape)) pyopencl_blas.gemm(queue, clA, clX, clE) softmax_plan() cost = sum_logloss(clyi, clE).get() clE[:] -= clY pyopencl_blas.gemm(queue, clA, clE, clG, transA=True) if lamb > 0: cost += 0.5 * lamb * pyopencl.array.sum(clX**2).get() # cost += 0.5 * lamb * sum_square(clX).get() clG[:] += lamb * clX G = clG.get().astype(np.float64) return cost, G.ravel() x0 = X0.ravel() x, mincost, info = scipy.optimize.fmin_l_bfgs_b(f_df, x0, maxfun=solver.n_epochs, iprint=solver.verbose) tend = time.time() A = clA.get() X = x.reshape(Xshape) return solver.mul_encoders(X, E), { 'rmses': npext.rms(softmax(np.dot(A, X), axis=1) - Y, axis=1), 'time': tend - tstart }
spike_names = ['layer 1', 'layer 5', 'layer 7'] n_presentations = 5 # n_presentations = 4 # n_presentations = 2 # n_presentations = 1 with nengo_ocl.Simulator(model) as sim: sim.run(n_presentations * presentation_time) t = sim.trange() nt = int(presentation_time / sim.dt) ct = int(c0 / sim.dt) n_classes = ccnet.output.size_out blocks = sim.data[output_p].reshape(n_presentations, nt, n_classes) values = softmax(blocks[:, ct:, :].mean(axis=1), axis=1) choices = np.argsort(values, axis=1)[:, ::-1] spike_blocks = [ sim.data[spike_p].reshape(n_presentations, nt, -1) for spike_p in spike_ps ] plt.figure(figsize=(6.4, 7)) rows = 2 + len(spike_ps) cols = n_presentations neuron_inds = [rng.permutation(block.shape[-1])[:40] for block in spike_blocks] for col in range(cols): label0 = label_names[Ytest[col]] tj = sim.dt * np.arange(nt) + col * presentation_time
def __call__(self, A, Y, rng=None, E=None, X=None): from nengo_extras.convnet import softmax assert E is None assert A.ndim == Y.ndim == 2 and A.shape[0] == Y.shape[0] m = A.shape[0] Xshape = (A.shape[1], Y.shape[1]) batch_size = self.batch_size # regularization sigma = self.reg * A.max() lamb = batch_size * sigma**2 print("sigma^2: %s" % sigma**2) tstart = time.time() Y = self.mul_encoders(Y, E) # --- solve with SGD Yi = Y.argmax(axis=1) eta = self.eta momentum = self.momentum bi = np.arange(batch_size) def batches(): for i in range(m // batch_size): r = range(i*batch_size, (i+1)*batch_size) yield A[r], Yi[r] def f_df(a, X, yi): yest = softmax(np.dot(a, X), axis=1) cost = -np.log(np.maximum(yest[bi, yi], 1e-16)).sum() e = yest # note: can copy if yest needed later e[bi, yi] -= 1 grad = np.dot(a.T, e) if lamb > 0: cost += 0.5 * lamb * (X**2).sum() grad += lamb * X return cost, grad X = rng.normal(scale=1./m, size=Xshape) if X is None else X.copy() V = np.zeros_like(X) for i_epoch in range(self.n_epochs): epoch_cost = 0 for a, yi in batches(): mu = abs(momentum) if mu > 0: V *= mu X2 = X - eta*V if momentum < 0 else X cost, grad = f_df(a, X2, yi) epoch_cost += cost if mu > 0: V += grad X -= eta * V else: X -= eta * grad if self.verbose >= 1: print("Epoch %3d: %0.2e" % (i_epoch, epoch_cost)) t = time.time() - tstart return X, { 'rmses': npext.rms(softmax(np.dot(A, X), axis=1) - Y, axis=1), 'time': t, 'iterations': self.n_epochs, }