def _solve(self, baseline_start=None, kernel_start=None): """ Performs nonparametric estimation and stores the result in the attributes `kernel` and `baseline` Parameters ---------- baseline_start : `None` or `np.ndarray`, shape=(n_nodes), default=None Used to force start values for mu parameter If `None` starts with uniform 1 values kernel_start : `None` or `np.ndarray', shape=(n_nodes, n_nodes, kernel_size), default=None Used to force start values for kernel parameter If `None` starts with random values """ if kernel_start is None: self.kernel = 0.1 * np.random.uniform( size=(self.n_nodes, self.n_nodes, self.kernel_size)) else: if kernel_start.shape != (self.n_nodes, self.n_nodes, self.kernel_size): raise ValueError('kernel_start has shape {} but should have ' 'shape {}'.format(kernel_start.shape, (self.n_nodes, self.n_nodes, self.kernel_size))) self.kernel = kernel_start.copy() if baseline_start is None: self.baseline = np.zeros(self.n_nodes) + 1 else: self.baseline = baseline_start.copy() _kernel_uvm_2d = self.kernel.reshape( (self.n_nodes, self.n_nodes * self.kernel_size)) for i in range(self.max_iter + 1): prev_baseline = self.baseline.copy() prev_kernel = self.kernel.copy() self._learner.solve(self.baseline, _kernel_uvm_2d) rel_baseline = relative_distance(self.baseline, prev_baseline) rel_kernel = relative_distance(self.kernel, prev_kernel) converged = max(rel_baseline, rel_kernel) <= self.tol force_print = (i == self.max_iter) or converged self._handle_history(i, rel_baseline=rel_baseline, rel_kernel=rel_kernel, force=force_print) if converged: break
def _solve(self, x0: np.ndarray = None, step: float = None): x, prev_x, y, grad_y, t, step, obj = \ self._initialize_values(x0, step) for n_iter in range(self.max_iter + 1): prev_t = t prev_x[:] = x prev_obj = obj x, y, t, step = self._gradient_step(x, prev_x, y, grad_y, t, prev_t, step) if step == 0: print('Step equals 0... at %i' % n_iter) break rel_delta = relative_distance(x, prev_x) obj = self.objective(x) rel_obj = abs(obj - prev_obj) / abs(prev_obj) converged = rel_obj < self.tol # If converged, we stop the loop and record the last step # in history self._handle_history(n_iter, force=converged, obj=obj, x=x.copy(), rel_delta=rel_delta, step=step, rel_obj=rel_obj) if converged: break self._set("solution", x) return x
def _solve(self, x0: np.ndarray = None, step: float = None): step, obj, x, prev_x, prev_grad_x_ssc, grad_x_ssc = \ self._initialize_values(x0, step=step) if self.modified: grad_y_ssc = np.empty_like(x) if self.step is None: self.step = 1e5 step = self._perform_line_search(x, x.copy(), self.step) l_k = 1. / step for n_iter in range(self.max_iter + 1): prev_obj = obj x, y, alpha_k, beta_k, lambda_k, l_k = \ self._gradient_step(x, prev_x, grad_x_ssc, prev_grad_x_ssc, n_iter, l_k) if self.modified: llh_y_ssc, _ = self.model_ssc.loss_and_grad(y, out=grad_y_ssc) llh_x_ssc, _ = self.model_ssc.loss_and_grad(x, out=grad_x_ssc) if self._objective_ssc(y, loss_ssc=llh_y_ssc) < \ self._objective_ssc(x, loss_ssc=llh_x_ssc): x[:] = y grad_x_ssc[:] = grad_y_ssc llh_x_ssc = llh_y_ssc else: llh_x_ssc, _ = self.model_ssc.loss_and_grad(x, out=grad_x_ssc) rel_delta = relative_distance(x, prev_x) llh_x = self.model_ssc.original_loss(llh_x_ssc) obj = self.objective(x, loss=llh_x) rel_obj = abs(obj - prev_obj) / abs(prev_obj) obj_gain = prev_obj - obj converged = rel_obj < self.tol # if converged, we stop the loop and record the last step in history self._handle_history(n_iter, force=converged, obj=obj, x=x.copy(), rel_delta=rel_delta, step=alpha_k, rel_obj=rel_obj, l_k=l_k, beta_k=beta_k, lambda_k=lambda_k, th_gain=self._th_gain, obj_gain=obj_gain) if converged: break self._set("solution", x) return x
def insp(xk): x = xk rel_delta = relative_distance(x, prev_x) prev_x[:] = x obj = self.objective(x) rel_obj = abs(obj - prev_obj[0]) / abs(prev_obj[0]) prev_obj[0] = obj self._handle_history(n_iter[0], force=False, obj=obj, x=xk.copy(), rel_delta=rel_delta, rel_obj=rel_obj) n_iter[0] += 1
def _solve(self, x0: np.array = None, step: float = None): """ Launch the solver Parameters ---------- x0 : np.array, shape=(n_coeffs,) Starting iterate for the solver step : float Step-size or learning rate for the solver Returns ------- output : np.array, shape=(n_coeffs,) Obtained minimizer """ if step is not None: self.step = step step, obj, minimizer, prev_minimizer = self._initialize_values( x0, step, n_empty_vectors=1) self._solver.set_starting_iterate(minimizer) # At each iteration we call self._solver.solve that does a full # epoch for n_iter in range(self.max_iter + 1): prev_minimizer[:] = minimizer prev_obj = obj # Launch one epoch using the wrapped C++ solver self._solver.solve() self._solver.get_minimizer(minimizer) # The step might be modified by the C++ solver # step = self._solver.get_step() obj = self.objective(minimizer) rel_delta = relative_distance(minimizer, prev_minimizer) rel_obj = abs(obj - prev_obj) / abs(prev_obj) converged = rel_obj < self.tol # If converged, we stop the loop and record the last step # in history self._handle_history(n_iter, force=converged, obj=obj, x=minimizer.copy(), rel_delta=rel_delta, rel_obj=rel_obj) if converged: break self._set("solution", minimizer) return minimizer
def _solve(self, x0: np.ndarray, step: float): x, x_old, z_list, z_old_list, obj, step = \ self.initialize_values(x0, step) n_prox = self.prox.n_proxs for n_iter in range(self.max_iter + 1): obj_old = obj grad_x = self.model.grad(x) for i in range(n_prox): z = z_list[i] z_old = z_old_list[i] z[:] = self.prox.call_i(i, 2 * x_old - z_old - step * grad_x, n_prox * step) # Relaxation step z[:] = z_old + self.surrelax * (z - x_old) x[:] = 1. / n_prox * sum(z_list) rel_delta = relative_distance(x, x_old) obj = self.objective(x) rel_obj = abs(obj - obj_old) / abs(obj_old) x_old[:] = x for i in range(n_prox): z_old_list[i][:] = z_list[i] converged = rel_obj < self.tol # if converged, we stop the loop and record the last step # in history self._handle_history(n_iter, force=converged, obj=obj, x=x.copy(), rel_delta=rel_delta, step=step, rel_obj=rel_obj) if converged: break self._set('solution', x)
def _solve(self, baseline_start=None, amplitudes_start=None): """Perform one iteration of the algorithm Parameters ---------- baseline_start : `None` or `np.ndarray`, shape=(n_nodes) Set initial value of baseline parameter If `None` starts with uniform 1 values amplitudes_start : `None` or `np.ndarray', shape=(n_nodes, n_nodes, n_gaussians) Set initial value of adjacency parameter If `None` starts with random values uniformly sampled between 0.5 and 0.9 """ if baseline_start is None: baseline_start = np.ones(self.n_nodes) self._set('baseline', baseline_start.copy()) if amplitudes_start is None: amplitudes_start = np.random.uniform( 0.5, 0.9, (self.n_nodes, self.n_nodes, self.n_gaussians)) else: if amplitudes_start.shape != ( self.n_nodes, self.n_nodes, self.n_gaussians): raise ValueError( 'amplitudes_start has shape {} but should have ' 'shape {}'.format( amplitudes_start.shape, (self.n_nodes, self.n_nodes, self.n_gaussians) )) self._set('amplitudes', amplitudes_start.copy()) _amplitudes_2d = self.amplitudes.reshape( (self.n_nodes, self.n_nodes * self.n_gaussians)) max_relative_distance = 1e-1 for i in range(self.max_iter + 1): prev_baseline = self.baseline.copy() prev_amplitudes = self.amplitudes.copy() inner_prev_baseline = self.baseline.copy() inner_prev_amplitudes = self.amplitudes.copy() self._learner.solve(self.baseline, _amplitudes_2d) inner_rel_baseline = relative_distance(self.baseline, inner_prev_baseline) inner_rel_adjacency = relative_distance(self.amplitudes, inner_prev_amplitudes) if self.em_tol is None: inner_tol = max_relative_distance * 1e-2 else: inner_tol = self.em_tol if max(inner_rel_baseline, inner_rel_adjacency) < inner_tol: break rel_baseline = relative_distance(self.baseline, prev_baseline) rel_amplitudes = relative_distance(self.amplitudes, prev_amplitudes) max_relative_distance = max(rel_baseline, rel_amplitudes) # We perform at least 5 iterations as at start we sometimes reach a # low tolerance if inner_tol is too low converged = max_relative_distance <= self.tol and i > 5 force_print = (i == self.max_iter) or converged self._handle_history(i, rel_baseline=rel_baseline, rel_amplitudes=rel_amplitudes, force=force_print) if converged: break
def _solve(self, baseline_start=None, adjacency_start=None): """Perform one iteration of the algorithm Parameters ---------- baseline_start : `None` or `np.ndarray`, shape=(n_nodes) Set initial value of baseline parameter If `None` starts with uniform 1 values adjacency_start : `None` or `np.ndarray', shape=(n_nodes, n_nodes) Set initial value of adjacency parameter If `None` starts with random values uniformly sampled between 0.5 and 0.9 """ if baseline_start is None: baseline_start = np.ones(self.n_nodes) self._set('baseline', baseline_start.copy()) if adjacency_start is None: adjacency_start = np.random.uniform(0.5, 0.9, (self.n_nodes, self.n_nodes)) self._set('adjacency', adjacency_start.copy()) z1 = np.zeros_like(self.adjacency) z2 = np.zeros_like(self.adjacency) u1 = np.zeros_like(self.adjacency) u2 = np.zeros_like(self.adjacency) if self.rho <= 0: raise ValueError("The parameter rho equals {}, while it should " "be strictly positive.".format(self.rho)) objective = self.objective(self.coeffs) max_relative_distance = 1e-1 for i in range(self.max_iter + 1): prev_objective = objective prev_baseline = self.baseline.copy() prev_adjacency = self.adjacency.copy() for _ in range(self.em_max_iter): inner_prev_baseline = self.baseline.copy() inner_prev_adjacency = self.adjacency.copy() self._learner.solve(self.baseline, self.adjacency, z1, z2, u1, u2) inner_rel_baseline = relative_distance(self.baseline, inner_prev_baseline) inner_rel_adjacency = relative_distance( self.adjacency, inner_prev_adjacency) if self.em_tol is None: inner_tol = max_relative_distance * 1e-2 else: inner_tol = self.em_tol if max(inner_rel_baseline, inner_rel_adjacency) < inner_tol: break z1 = self._prox_nuclear.call(np.ravel(self.adjacency + u1), step=1. / self.rho) \ .reshape(self.n_nodes, self.n_nodes) z2 = self._prox_l1.call(np.ravel(self.adjacency + u2), step=1. / self.rho) \ .reshape(self.n_nodes, self.n_nodes) u1 += self.adjacency - z1 u2 += self.adjacency - z2 objective = self.objective(self.coeffs) rel_obj = abs(objective - prev_objective) / abs(prev_objective) rel_baseline = relative_distance(self.baseline, prev_baseline) rel_adjacency = relative_distance(self.adjacency, prev_adjacency) max_relative_distance = max(rel_baseline, rel_adjacency) # We perform at least 5 iterations as at start we sometimes reach a # low tolerance if inner_tol is too low converged = max_relative_distance <= self.tol and i > 5 force_print = (i == self.max_iter) or converged self._handle_history(i, obj=objective, rel_obj=rel_obj, rel_baseline=rel_baseline, rel_adjacency=rel_adjacency, force=force_print) if converged: break
def _solve(self, baseline_start=None, amplitudes_start=None, basis_kernels_start=None): """Perform nonparametric estimation Parameters ---------- baseline_start : `None` or `np.ndarray`, shape=(n_nodes) Used to force start values for baseline attribute If `None` starts with uniform 1 values amplitudes_start : `None` or `np.ndarray', shape=(n_nodes,n_nodes,D) Used to force start values for amplitude parameter If `None` starts with random values uniformly sampled between 0.5 and 0.9 basis_kernels_start : `None` or `p.andarray, shape=(D,kernel_size) Used to force start values for the basis kernels If `None` starts with random values uniformly sampled between 0 and 0.1 """ if baseline_start is None: self._set("baseline", np.zeros(self.n_nodes) + 1) else: self._set("baseline", baseline_start.copy()) if amplitudes_start is None: self._set( "amplitudes", np.random.uniform(0.5, 0.9, size=(self.n_nodes, self.n_nodes, self.n_basis))) else: self._set("amplitudes", amplitudes_start.copy()) if basis_kernels_start is None: self._set( "basis_kernels", 0.1 * np.random.uniform(size=(self.n_basis, self.kernel_size))) else: self._set("basis_kernels", basis_kernels_start.copy()) self._set( '_amplitudes_2d', self.amplitudes.reshape( (self.n_nodes, self.n_nodes * self.n_basis))) for i in range(self.max_iter + 1): prev_baseline = self.baseline.copy() prev_amplitudes = self.amplitudes.copy() prev_basis_kernels = self.basis_kernels.copy() rel_ode = self._learner.solve(self.baseline, self.basis_kernels, self._amplitudes_2d, self.ode_max_iter, self.ode_tol) rel_baseline = relative_distance(self.baseline, prev_baseline) rel_amplitudes = relative_distance(self.amplitudes, prev_amplitudes) rel_basis_kernels = relative_distance(self.basis_kernels, prev_basis_kernels) converged = max(rel_baseline, rel_amplitudes, rel_basis_kernels) <= self.tol force_print = (i == self.max_iter) or converged self._handle_history(i, rel_baseline=rel_baseline, rel_amplitudes=rel_amplitudes, rel_basis_kernels=rel_basis_kernels, rel_ode=rel_ode, force=force_print) if converged: break