def alg2_VT(self, Qx, Qxprime, M): #K1, K2, K3 B = T.outer(Qx, Qxprime) C = T.sqrt(B)# in R^{n*n} D = M / C # this is lamblda in ReLU analyrucal formula E = np.clip(D, -1, 1) # clipping E between -1 and 1 for numerical stability. F = (1 / (2 * np.pi)) * (E * (np.pi - T.arccos(E)) + T.sqrt(1 - E ** 2)) * C G = (np.pi - T.arccos(E)) / (2 * np.pi) return F,G
def VT(self, M): A = T.diag(M) # GP_old is in R^{n*n} having the output gp kernel # of all pairs of data in the data set B = A * A[:, None] C = T.sqrt(B) # in R^{n*n} D = M / C # this is lamblda in ReLU analyrucal formula E = T.clip(D, -1, 1) # clipping E between -1 and 1 for numerical stability. F = (1 / (2 * np.pi)) * (E * (np.pi - T.arccos(E)) + T.sqrt(1 - E ** 2)) * C G = (np.pi - T.arccos(E)) / (2 * np.pi) return F,G
def alg1_VT_dep(self, M): #here i will use M as the previous little q ////// NxN, same value for every row A = T.diag(M) # GP_old is in R^{n*n} having the output gp kernel # of all pairs of data in the data set B = A * A[:, None] C = T.sqrt(B) # in R^{n*n} D = M / C # this is lambda in ReLU analyrucal formula (c in alg) E = T.clip(D, -1, 1) # clipping E between -1 and 1 for numerical stability. F = (1 / (2 * np.pi)) * (E * (np.pi - T.arccos(E)) + T.sqrt(1 - E ** 2)) * C G = (np.pi - T.arccos(E)) / (2 * np.pi) return F,G
def forward(self, input, deterministic=None): if deterministic is None: deterministic = self.deterministic dirac = T.cast(deterministic, 'float32') self.mean = T.mean(input, self.axis, keepdims=True) self.var = T.var(input, self.axis, keepdims=True) if len(self.updates.keys()) == 0: self.avgmean, upm, step = T.ExponentialMovingAverage( self.mean, self.beta1) self.avgvar, upv, step = T.ExponentialMovingAverage( self.var, self.beta2, step=step, init=numpy.ones(self.var.shape).astype('float32')) self.add_variable(self.avgmean) self.add_variable(self.avgvar) self.add_update(upm) self.add_update(upv) self.usemean = self.mean * (1 - dirac) + self.avgmean * dirac self.usevar = self.var * (1 - dirac) + self.avgvar * dirac return self.W * (input - self.usemean) / \ (T.sqrt(self.usevar) + self.const) + self.b
def create_updates( self, grads_or_loss, learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8, params=None, ): if params is None: params = symjax.get_variables(trainable=True) grads = self._get_grads(grads_or_loss, params) # get the learning rate if callable(learning_rate): learning_rate = learning_rate() updates = dict() for param, grad in zip(params, grads): m = symjax.nn.schedules.ExponentialMovingAverage(grad, beta1)[0] v = symjax.nn.schedules.ExponentialMovingAverage(grad**2, beta2)[0] update = m / (tensor.sqrt(v) + epsilon) updates[param] = param - learning_rate * update self.add_updates(updates)
def create_updates( self, grads_or_loss, learning_rate, amsgrad=False, beta_1=0.9, beta_2=0.999, epsilon=1e-7, params=None, ): if isinstance(grads_or_loss, list): assert params if params is None: params = self._get_variables(grads_or_loss) elif type(params) != list: raise RuntimeError("given params should be a list") if len(params) == 0: raise RuntimeError( "no parameters are given for the gradients, this can be due to passing explicitly an empty list or to passing a lost connected to no trainable weights" ) grads = self._get_grads(grads_or_loss, params) local_step = tensor.Variable(1, dtype="int32", trainable=False) updates = {local_step: local_step + 1} beta_1_t = tensor.power(beta_1, local_step) beta_2_t = tensor.power(beta_2, local_step) lr = learning_rate * (tensor.sqrt(1 - beta_2_t) / (1 - beta_1_t)) for param, grad in zip(params, grads): m = ExponentialMovingAverage(grad, beta_1, debias=False)[0] v = ExponentialMovingAverage(grad**2, beta_2, debias=False)[0] if amsgrad: v_hat = tensor.Variable(tensor.zeros_like(param), name="v_hat", trainable=False) updates[v_hat] = tensor.maximum(v_hat, v) update = m / (tensor.sqrt(updates[v_hat]) + epsilon) else: update = m / (tensor.sqrt(v) + epsilon) update = tensor.where(local_step == 1, grad, update) updates[param] = param - lr * update self.add_updates(updates)
def RNTK_relu(x, RNTK_old, GP_old, param, output): sw = param["sigmaw"] su = param["sigmau"] sb = param["sigmab"] sv = param["sigmav"] a = T.diag(GP_old) # GP_old is in R^{n*n} having the output gp kernel # of all pairs of data in the data set B = a * a[:, None] C = T.sqrt(B) # in R^{n*n} D = GP_old / C # this is lamblda in ReLU analyrucal formula # clipping E between -1 and 1 for numerical stability. E = T.clip(D, -1, 1) F = (1 / (2 * np.pi)) * (E * (np.pi - T.arccos(E)) + T.sqrt(1 - E**2)) * C G = (np.pi - T.arccos(E)) / (2 * np.pi) if output: GP_new = sv**2 * F RNTK_new = sv**2.0 * RNTK_old * G + GP_new else: X = x * x[:, None] GP_new = sw**2 * F + (su**2 / m) * X + sb**2 RNTK_new = sw**2.0 * RNTK_old * G + GP_new return RNTK_new, GP_new
def generate_gaussian_filterbank(N, M, J, f0, f1, modes=1): # gaussian parameters freqs = get_scaled_freqs(f0, f1, J) freqs *= (J - 1) * 10 if modes > 1: other_modes = np.random.randint(0, J, J * (modes - 1)) freqs = np.concatenate([freqs, freqs[other_modes]]) # crate the average vectors mu_init = np.stack([freqs, 0.1 * np.random.randn(J * modes)], 1) mu = T.Variable(mu_init.astype('float32'), name='mu') # create the covariance matrix cor = T.Variable(0.01 * np.random.randn(J * modes).astype('float32'), name='cor') sigma_init = np.stack([freqs / 6, 1. + 0.01 * np.random.randn(J * modes)], 1) sigma = T.Variable(sigma_init.astype('float32'), name='sigma') # create the mixing coefficients mixing = T.Variable(np.ones((modes, 1, 1)).astype('float32')) # now apply our parametrization coeff = T.stop_gradient(T.sqrt((T.abs(sigma) + 0.1).prod(1))) * 0.95 Id = T.eye(2) cov = Id * T.expand_dims((T.abs(sigma)+0.1),1) +\ T.flip(Id, 0) * (T.tanh(cor) * coeff).reshape((-1, 1, 1)) cov_inv = T.linalg.inv(cov) # get the gaussian filters time = T.linspace(-5, 5, M) freq = T.linspace(0, J * 10, N) x, y = T.meshgrid(time, freq) grid = T.stack([y.flatten(), x.flatten()], 1) centered = grid - T.expand_dims(mu, 1) # asdf gaussian = T.exp(-(T.matmul(centered, cov_inv)**2).sum(-1)) norm = T.linalg.norm(gaussian, 2, 1, keepdims=True) gaussian_2d = T.abs(mixing) * T.reshape(gaussian / norm, (J, modes, N, M)) return gaussian_2d.sum(1, keepdims=True), mu, cor, sigma, mixing
def create_transform(input, args): input_r = input.reshape((args.BS, 1, -1)) if args.option == 'melspec': layer = [ T.signal.melspectrogram(input_r, window=args.bins, hop=args.hop, n_filter=args.J * args.Q, low_freq=3, high_freq=22050, nyquist=22050, mode='same') ] elif args.option == 'raw': layer = [ layers.Conv1D(input_r, strides=args.hop, W_shape=(args.J * args.Q, 1, args.bins), trainable_b=False, pad='SAME') ] layer.append(layers.Lambda(T.expand_dims(layer[-1], 1), T.abs)) elif args.option == 'morlet': filters = generate_morlet_filterbank(args.bins, args.J, args.Q) layer = [ layers.Conv1D(input_r, args.J * args.Q, args.bins, W=filters.real(), trainable_W=False, stride=args.hop, trainable_b=False, pad='SAME') ] layer.append( layers.Conv1D(input_r, args.J * args.Q, args.bins, W=filters.imag(), trainable_W=False, stride=args.hop, trainable_b=False, pad='SAME')) layer.append(T.sqrt(layer[-1]**2 + layer[-2]**2)) layer.append(T.expand_dims(layer[-1], 1)) elif args.option == 'learnmorlet': filters, freqs, scales = generate_learnmorlet_filterbank( args.bins, args.J, args.Q) layer = [ layers.Conv1D(input_r, args.J * args.Q, args.bins, W=T.real(filters), trainable_W=False, stride=args.hop, trainable_b=False, pad='SAME') ] layer.append( layers.Conv1D(input_r, args.J * args.Q, args.bins, W=T.imag(filters), trainable_W=False, stride=args.hop, trainable_b=False, pad='SAME')) layer[0].add_variable(freqs) layer[0].add_variable(scales) layer[0]._filters = filters layer[0]._scales = scales layer[0]._freqs = freqs layer.append(T.sqrt(layer[-1]**2 + layer[-2]**2 + 0.001)) layer.append(T.expand_dims(layer[-1], 1)) elif 'wvd' in args.option: WVD = T.signal.wvd(input_r, window=args.bins * 2, L=args.L * 2, hop=args.hop, mode='same') if args.option == 'wvd': modes = 1 else: modes = 3 filters, mu, cor, sigma, mixing = generate_gaussian_filterbank( args.bins, 64, args.J * args.Q, 5, 22050, modes) print(WVD) # filters=T.random.randn((args.J * args.Q, 1, args.bins*2, 5)) wvd = T.convNd(WVD, filters)[:, :, 0] print('wvd', wvd) layer = [layers.Identity(T.expand_dims(wvd, 1))] layer[-1].add_variable(mu) layer[-1].add_variable(cor) layer[-1].add_variable(sigma) layer[-1].add_variable(mixing) layer[-1]._mu = mu layer[-1]._cor = cor layer[-1]._sigma = sigma layer[-1]._mixing = mixing layer[-1]._filter = filters layer.append(layers.Lambda(layer[-1], T.abs)) elif args.option == 'sinc': filters, freq = generate_sinc_filterbank(5, 22050, args.J * args.Q, args.bins) layer = [ layers.Conv1D(input.reshape((args.BS, 1, -1)), args.J * args.Q, args.bins, W=filters, stride=args.hop, trainable_b=False, trainable_W=False, pad='SAME') ] layer[-1]._freq = freq layer[-1]._filter = filters layer[-1].add_variable(freq) layer.append(T.expand_dims(layer[-1], 1)) layer.append(layers.Lambda(layer[-1], T.abs)) return layer