def check_grad(self, vector, name, frame): if len(self.data[name]) == 0: return True last_vect, idx = self.get_last(name) gradient = np.linalg.norm(utils.grad(vector, last_vect, frame - idx)) print("************", gradient) return gradient < config.face_orientation_max_grad
def run_irl(world, car, reward, theta, data): def gen(): for point in data: for c, x0, u in zip(world.cars, point['x0'], point['u']): c.traj.x0.set_value(x0) for cu, uu in zip(c.traj.u, u): cu.set_value(uu) yield r = car.traj.reward(reward) g = utils.grad(r, car.traj.u) H = utils.hessian(r, car.traj.u) I = tt.eye(utils.shape(H)[0]) reg = utils.vector(1) reg.set_value([1e-1]) H = H - reg[0] * I L = tt.dot(g, tt.dot(tn.MatrixInverse()(H), g)) + tt.log(tn.Det()(-H)) for _ in gen(): pass optimizer = utils.Maximizer(L, [theta], gen=gen, method='gd', eps=0.1, debug=True, iters=1000, inf_ignore=10) optimizer.maximize() print theta.get_value()
def run_irl(world, car, reward, theta, data): def gen(): for point in data: for c, x0, u in zip(world.cars, point['x0'], point['u']): c.traj.x0.set_value(x0) for cu, uu in zip(c.traj.u, u): cu.set_value(uu) yield r = car.traj.reward(reward) g = utils.grad(r, car.traj.u) H = utils.hessian(r, car.traj.u) I = tt.eye(utils.shape(H)[0]) reg = utils.vector(1) reg.set_value([1e-1]) H = H-reg[0]*I L = tt.dot(g, tt.dot(tn.MatrixInverse()(H), g))+tt.log(tn.Det()(-H)) for _ in gen(): pass optimizer = utils.Maximizer(L, [theta], gen=gen, method='gd', eps=0.1, debug=True, iters=1000, inf_ignore=10) optimizer.maximize() print theta.get_value()
def on_main_button_click(self, sender, sender_name): if sender_name != 'OK': self.destroy() return if not( isfloat(self.res['ms1']) and isfloat(self.res['ms2']) ): return ms1 = int(self.res['ms1']) ms2 = int(self.res['ms2']) # Create list of subtitles to apply changes applyItems = [] if self.res['applyToSubs'] == 'all lines': for item in self.subtitleModel.get_model(): applyItems.append(item[0]) else: self.get_tv_selection() if len(self.tvSelectionList) == 0: self.destroy() return applyItems = self.tvSelectionList[:] A0 = int(applyItems[0].startTime) B0 = int(applyItems[-1].startTime) A = int(applyItems[0].startTime) + ms1 * (1 if self.res['op1'] == 'Add' else -1) B = int(applyItems[-1].startTime) + ms2 * (1 if self.res['op2'] == 'Add' else -1) if A0 == B0: return for item in applyItems: duration = int(item.duration) new_start_time = int( grad(A0, B0, A, B, int(item.startTime)) ) new_stop_time = int(new_start_time) + duration self.changeList.append( (item, int(item.startTime), int(item.stopTime), int(new_start_time), int(new_stop_time)) ) item.startTime = new_start_time item.stopTime = new_stop_time self.destroy()
Gradually, the model will find the best combination of weights and bias to minimize loss. ''' st.code(''' loss_value, gradients = grad(model, features, labels) # print the initial loss, no optimisation print("Step: {}, Initial Loss: {}".format(optimizer.iterations.numpy(), loss_value.numpy())) # calculate a single optimisation step optimizer.apply_gradients(zip(gradients, model.trainable_variables)) print("Step: {}, Initial Loss: {}".format(optimizer.iterations.numpy(), loss(model, features, labels ).numpy())) ''') loss_value, gradients = grad(model, features, labels) st.write(f"Step: {optimizer.iterations.numpy()} \ \nInitial Loss: {loss_value.numpy()}") optimizer.apply_gradients(zip(gradients, model.trainable_variables)) st.write(f"Step: {optimizer.iterations.numpy()} \ \nLoss: {loss(model, features, labels).numpy()}") ''' #### Training loop The model is ready for training. A *training loop* feeds the dataset examples into the ```model``` to help it make better predictions. The following code sets up these *training steps*:
def adiana(X, y, w, arg, f_opt, tol=1e-15, verbose=True): ''' ------------------------- Implementation of DIANA method ------------------------- X - data matrix y - labels vectors w - initial point arg - class containing all parameters of method and comressor f_opt - optimal function value tol - desired tolerance of the solution verbose - if True, then function values in each iteration are printed return: loss - numpy array containing function value in each iteration of the method com_bits - numpy array containing transmitted bits by one node to the server ''' alg = 'ADIANA' dim = X.shape[1] omega = compute_omega(dim, arg) arg.alpha = 1 / (1 + omega) arg.theta_2 = 0.5 if omega == 0: arg.prob = 1 arg.eta = 0.5 / arg.L else: arg.prob = min( 1, max(0.5 * arg.alpha, 0.5 * arg.alpha * (np.sqrt(arg.node / (32 * omega)) - 1))) arg.eta = min( 0.5 / arg.L, arg.node / (64 * omega * arg.L * ((2 * arg.prob * (omega + 1) + 1)**2))) arg.theta_1 = min(1 / 4, np.sqrt(arg.eta * arg.lamda / arg.prob)) arg.gamma = 0.5 * arg.eta / (arg.theta_1 + arg.eta * arg.lamda) arg.beta = 1 - arg.gamma * arg.lamda if verbose: print('algorithm ' + alg + ' starts') print('eta = ', arg.eta, 'compression: ', arg.comp_method) print('f_opt = ', f_opt) dim = X.shape[1] num_data = y.shape[0] num_data_worker = int(np.floor(num_data / arg.node)) zk = w yk = w wk = w xk = w loss = [] local_gradx = np.zeros((arg.node, dim)) local_gradw = np.zeros((arg.node, dim)) hs = np.zeros((arg.node, dim)) hs_mean = np.mean(hs, axis=0) deltas = np.zeros((arg.node, dim)) deltasw = np.zeros((arg.node, dim)) loss_0 = loss_logistic(X, y, yk, arg) if verbose: print('at iteration 0', 'loss =', loss_0) loss.append(loss_0) com_bits = [1] bits = 1 comp_method = compression_dic[arg.comp_method] com_round_bit = compute_bit(dim, arg) k = 0 while k < arg.T and loss[-1] - f_opt > tol: k += 1 xk = arg.theta_1 * zk + arg.theta_2 * wk + (1 - arg.theta_1 - arg.theta_2) * yk for i in range(arg.node): local_gradx[i] = grad( X[i * num_data_worker:(i + 1) * num_data_worker], y[i * num_data_worker:(i + 1) * num_data_worker], xk, arg) deltas[i] = comp_method(local_gradx[i] - hs[i], arg) local_gradw[i] = grad( X[i * num_data_worker:(i + 1) * num_data_worker], y[i * num_data_worker:(i + 1) * num_data_worker], wk, arg) deltasw[i] = comp_method(local_gradw[i] - hs[i], arg) hs[i] += arg.alpha * deltasw[i] gk = np.mean(deltas, axis=0) + hs_mean assert gk.shape[0] == len(w) hs_mean += arg.alpha * np.mean(deltasw, axis=0) assert hs_mean.shape[0] == len(w) oldyk = yk yk = xk - arg.eta * gk zk = arg.beta * zk + (1 - arg.beta) * xk + (arg.gamma / arg.eta) * (yk - xk) change = np.random.random() if bernoulli.rvs(arg.prob): wk = oldyk bits += com_round_bit loss_k = loss_logistic(X, y, yk, arg) loss.append(loss_k) com_bits.append(bits) if verbose: if k % 1000 == 0: print('at iteration', k + 1, ' loss =', loss_k) loss = np.array(loss) com_bits = np.array(com_bits) return loss, com_bits
def diana(X, y, w, arg, f_opt, tol=1e-15, verbose=True): ''' ------------------------- Implementation of DIANA method ------------------------- X - data matrix y - labels vectors w - initial point arg - class containing all parameters of method and comressor f_opt - optimal function value tol - desired tolerance of the solution verbose - if True, then function values in each iteration are printed return: loss - numpy array containing function value in each iteration of the method com_bits - numpy array containing transmitted bits by one node to the server ''' alg = 'DIANA' dim = X.shape[1] omega = compute_omega(dim, arg) arg.alpha = 1 / (1 + omega) arg.eta = min(arg.alpha / (2 * arg.lamda), 2 / ((arg.L + arg.lamda) * (1 + 6 * omega / arg.node))) if verbose: print('algorithm ' + alg + ' starts') print('eta = ', arg.eta, 'compression: ', arg.comp_method) print('f_opt = ', f_opt) num_data = y.shape[0] num_data_worker = int(np.floor(num_data / arg.node)) loss = [] local_grad = np.zeros((arg.node, dim)) hs = np.zeros((arg.node, dim)) hs_mean = np.mean(hs, axis=0) deltas = np.zeros((arg.node, dim)) loss_0 = loss_logistic(X, y, w, arg) if verbose: print('at iteration 0', 'loss =', loss_0) loss.append(loss_0) com_bits = [1] bits = 1 comp_method = compression_dic[arg.comp_method] com_round_bit = compute_bit(dim, arg) k = 0 while k < arg.T and loss[-1] - f_opt > tol: k += 1 for i in range(arg.node): local_grad[i] = grad( X[i * num_data_worker:(i + 1) * num_data_worker], y[i * num_data_worker:(i + 1) * num_data_worker], w, arg) deltas[i] = comp_method(local_grad[i] - hs[i], arg) hs[i] += arg.alpha * deltas[i] gk = np.mean(deltas, axis=0) + hs_mean assert gk.shape[0] == len(w) hs_mean += arg.alpha * np.mean(deltas, axis=0) assert hs_mean.shape[0] == len(w) w = w - arg.eta * gk bits += com_round_bit loss_k = loss_logistic(X, y, w, arg) loss.append(loss_k) com_bits.append(bits) if verbose: if k % 1000 == 0: print('at iteration', k + 1, ' loss =', loss_k) loss = np.array(loss) com_bits = np.array(com_bits) return loss, com_bits
def call_symplectic_shift(self, x): def f(x): return self.forward(x) return utils.grad(f, [x])[0]
def log_p(self, reward): r = self.total(reward) g = utils.grad(r, self.u) H = utils.jacobian(g, self.u) return 0.5 * tt.dot(g, tt.dot(tn.matrix_inverse(H), g)) + 0.5 * tt.log( abs(tn.det(-H)))