def generate_text(n_atoms=5, text_length=3000, n_spaces=3, random_state=None): """Generate a text image with text_length leters chosen among n_atoms. Parameters ---------- n_atoms: int (default: 5) Number of letters used to generate the text. This should not be above 26 as only lower-case ascii letters are used here. text_length: int (default: 3000) Number of character that compose the text image. This also account for white space characters. random_state : int, RandomState instance or None (default) Determines random number generation for centroid initialization and random reassignment. Use an int to make the randomness deterministic. Returns ------- X: ndarray, shape (height, width) Image composed of a text of `text_length` characters drawn uniformly among `n_atoms` letters and 2 whitespaces. D: ndarray, shape (n_atoms, *atom_support) Images of the characters used to generate the image `X`. """ if random_state == 'PAMI': rng = check_random_state(0) D_char = np.array(list('PAMI' + ' ' * n_spaces)) else: rng = check_random_state(random_state) chars = list(string.ascii_lowercase) D_char = np.r_[rng.choice(chars, replace=False, size=n_atoms), [' '] * n_spaces] text_char_idx = rng.choice(len(D_char), replace=True, size=text_length) text = ''.join([D_char[i] for i in text_char_idx]) X = convert_str_to_png(text, margin=0) D = [convert_str_to_png(D_k, margin=0) for D_k in D_char[:-n_spaces]] # Reshape all atoms to the same shape D_reshaped = [] atom_shape = np.array([dk.shape for dk in D]).max(axis=0) for dk in D: padding = get_centered_padding(dk.shape, atom_shape) D_reshaped.append(np.pad(dk, padding)) D = np.array(D_reshaped) D = prox_d(D) print(f"{text_length} - image shape: {X.shape}, pattern shape: {D.shape}") return X, D
def test_warm_start(valid_support, atom_support, reg): tol = 1 n_atoms = 7 n_channels = 5 random_state = 36 rng = check_random_state(random_state) D = rng.randn(n_atoms, n_channels, *atom_support) D /= np.sqrt(np.sum(D * D, axis=(1, 2), keepdims=True)) z = rng.randn(n_atoms, *valid_support) z *= (rng.rand(n_atoms, *valid_support) > .7) X = reconstruct(z, D) z_hat, *_ = dicod(X, D, reg=0, z0=z, tol=tol, n_workers=N_WORKERS, max_iter=10000, verbose=VERBOSE) assert np.allclose(z_hat, z) X = rng.randn(*X.shape) z_hat, *_ = dicod(X, D, reg, z0=z, tol=tol, n_workers=N_WORKERS, max_iter=100000, verbose=VERBOSE) beta, dz_opt, _ = _init_beta(X, D, reg, z_i=z_hat) assert np.all(dz_opt <= tol)
def test_ztz(valid_shape, atom_shape): tol = .5 reg = .1 n_atoms = 7 n_channels = 5 random_state = None sig_shape = tuple([ (size_valid_ax + size_atom_ax - 1) for size_atom_ax, size_valid_ax in zip(atom_shape, valid_shape)]) rng = check_random_state(random_state) X = rng.randn(n_channels, *sig_shape) D = rng.randn(n_atoms, n_channels, *atom_shape) D /= np.sqrt(np.sum(D * D, axis=(1, 2), keepdims=True)) z_hat, ztz, ztX, *_ = dicod(X, D, reg, tol=tol, n_jobs=N_WORKERS, return_ztz=True, verbose=VERBOSE) ztz_full = compute_ztz(z_hat, atom_shape) assert np.allclose(ztz_full, ztz) ztX_full = compute_ztX(z_hat, X) assert np.allclose(ztX_full, ztX)
def test_freeze_support(valid_support, atom_support): tol = .5 reg = 0 n_atoms = 7 n_channels = 5 random_state = None sig_support = get_full_support(valid_support, atom_support) rng = check_random_state(random_state) D = rng.randn(n_atoms, n_channels, *atom_support) D /= np.sqrt(np.sum(D * D, axis=(1, 2), keepdims=True)) z = rng.randn(n_atoms, *valid_support) z *= rng.rand(n_atoms, *valid_support) > .5 X = rng.randn(n_channels, *sig_support) z_hat, *_ = dicod(X, D, reg, z0=0 * z, tol=tol, n_workers=N_WORKERS, max_iter=1000, freeze_support=True, verbose=VERBOSE) assert np.all(z_hat == 0) z_hat, *_ = dicod(X, D, reg, z0=z, tol=tol, n_workers=N_WORKERS, max_iter=1000, freeze_support=True, verbose=VERBOSE) assert np.all(z_hat[z == 0] == 0)
def compare_strategies(strategies, n_rep=10, n_workers=4, timeout=7200, list_n_times=[150, 750], list_reg=[1e-1, 5e-1], random_state=None): '''Run DICOD strategy for a certain problem with different value for n_workers and store the runtime in csv files if given a save_dir. Parameters ---------- strategies: list of str in { 'greedy', 'lgcd', 'random', 'cyclic'} Algorithm to run the benchmark for n_rep: int (default: 10) Number of repetition for each strategy to average. n_workers: int (default: 4) Number of jobs to run strategies in parallel. timeout: int (default: 7200) maximal runtime for each strategy. The default timeout is 2 hours. list_n_times: list of int Size of the generated problems. list_reg: list of float Regularization parameter of the considered problem. random_state: None or int or RandomState Seed for the random number generator. ''' rng = check_random_state(random_state) # Parameters to generate the simulated problems n_times_atom = 250 n_atoms = 25 n_channels = 7 noise_level = 1 # Parameters for the algorithm tol = 1e-8 dicod_args = dict(timing=False, timeout=timeout, max_iter=int(5e8), verbose=2) # Get the list of parameter to call list_seeds = [rng.randint(MAX_INT) for _ in range(n_rep)] strategies = [s[0] for s in strategies] list_args = itertools.product(strategies, list_reg, list_n_times, list_seeds) # Run the computation results = Parallel(n_workers=n_workers)( delayed( run_one)(n_times, n_times_atom, n_atoms, n_channels, noise_level, random_state, reg, tol, strategy, dicod_args) for strategy, reg, n_times, random_state in list_args) # Save the results as a DataFrame results = pandas.DataFrame(results) results.to_pickle(SAVE_FILE_NAME.format('.pkl'))
def run_scaling_grid(n_rep=1, max_workers=225, random_state=None): '''Run DICOD with different n_workers on a grid and on a line. ''' # Parameters to generate the simulated problems n_atoms = 5 atom_support = (8, 8) rng = check_random_state(random_state) # Parameters for the algorithm tol = 1e-4 dicod_args = dict(z_positive=False, timeout=None, max_iter=int(1e9), verbose=1) # Generate the list of parameter to call reg_list = [5e-1, 2e-1, 1e-1] list_soft_lock = ['border'] # , 'corner'] list_grid = [True, False] list_n_workers = np.unique(np.logspace(0, np.log10(15), 20, dtype=int))**2 list_random_states = enumerate(rng.randint(MAX_INT, size=n_rep)) # HACK # list_grid = [False] # list_n_workers = [25] it_args = itertools.product(list_n_workers, reg_list, list_grid, list_soft_lock, list_random_states) # Filter out the arguments where the algorithm cannot run because there # is too many workers. it_args = [args for args in it_args if args[2] or args[0] <= 36] it_args = [ args if args[2] or args[0] < 32 else (32, *args[1:]) for args in it_args ] # run the benchmark run_one = delayed(run_one_grid) results = ParallelResourceBalance(max_workers=max_workers)( run_one(n_atoms=n_atoms, atom_support=atom_support, reg=reg, n_workers=n_workers, grid=grid, tol=tol, soft_lock=soft_lock, dicod_args=dicod_args, random_state=random_state) for (n_workers, reg, grid, soft_lock, random_state) in it_args) # Save the results as a DataFrame results = pandas.DataFrame(results) results.to_pickle(get_save_file_name(ext='pkl'))
def test_distributed_sparse_encoder(): rng = check_random_state(42) n_atoms = 10 n_channels = 3 n_times_atom = 10 n_times = 10 * n_times_atom reg = 5e-1 params = dict(tol=1e-2, n_seg='auto', timing=False, timeout=None, verbose=100, strategy='greedy', max_iter=100000, soft_lock='border', z_positive=True, return_ztz=False, freeze_support=False, warm_start=False, random_state=27) X = rng.randn(n_channels, n_times) D = rng.randn(n_atoms, n_channels, n_times_atom) sum_axis = tuple(range(1, D.ndim)) D /= np.sqrt(np.sum(D * D, axis=sum_axis, keepdims=True)) DtD = compute_DtD(D) encoder = DistributedSparseEncoder(n_workers=2) encoder.init_workers(X, D, reg, params, DtD=DtD) with pytest.raises(ValueError, match=r"pre-computed value DtD"): encoder.set_worker_D(D) encoder.process_z_hat() z_hat = encoder.get_z_hat() # Check that distributed computations are correct for cost and sufficient # statistics cost_distrib = encoder.get_cost() cost = compute_objective(X, z_hat, D, reg) assert np.allclose(cost, cost_distrib) ztz_distrib, ztX_distrib = encoder.get_sufficient_statistics() ztz = compute_ztz(z_hat, (n_times_atom, )) ztX = compute_ztX(z_hat, X) assert np.allclose(ztz, ztz_distrib) assert np.allclose(ztX, ztX_distrib)
def run_scaling_benchmark(max_n_workers, n_rep=1, random_state=None): '''Run DICOD with different n_workers for a 2D problem. ''' # Parameters to generate the simulated problems n_atoms = 5 atom_support = (8, 8) rng = check_random_state(random_state) # Parameters for the algorithm tol = 1e-3 dicod_args = dict(z_positive=False, soft_lock='border', timeout=None, max_iter=int(1e9), verbose=1) # Generate the list of parameter to call reg_list = [5e-1, 2e-1, 1e-1] list_n_workers = np.unique(np.logspace(0, np.log10(256), 15, dtype=int)) list_n_workers = [n if n != 172 else 169 for n in list_n_workers] list_n_workers += [18 * 18, 20 * 20] list_strategies = ['lgcd', 'gcd'] list_random_states = list(enumerate(rng.randint(MAX_INT, size=n_rep))) assert np.max(list_n_workers) < max_n_workers, ( f"This benchmark need to have more than {list_n_workers.max()} to run." f" max_n_workers was set to {max_n_workers}, which is too low.") it_args = itertools.product(list_n_workers, reg_list, list_strategies, list_random_states) # run the benchmark run_one = delayed(run_one_scaling_2d) results = ParallelResourceBalance(max_workers=max_n_workers)( run_one(n_atoms=n_atoms, atom_support=atom_support, reg=reg, n_workers=n_workers, strategy=strategy, tol=tol, dicod_args=dicod_args, random_state=random_state) for (n_workers, reg, strategy, random_state) in it_args) # Save the results as a DataFrame results = pandas.DataFrame(results) results.to_pickle(get_save_file_name(ext='pkl'))
def test_stopping_criterion(n_workers, signal_support, atom_support): tol = 1 reg = 1 n_atoms = 10 n_channels = 3 rng = check_random_state(42) X = rng.randn(n_channels, *signal_support) D = rng.randn(n_atoms, n_channels, *atom_support) sum_axis = tuple(range(1, D.ndim)) D /= np.sqrt(np.sum(D * D, axis=sum_axis, keepdims=True)) z_hat, *_ = dicod(X, D, reg, tol=tol, n_workers=n_workers, verbose=VERBOSE) beta, dz_opt, _ = _init_beta(X, D, reg, z_i=z_hat) assert abs(dz_opt).max() < tol
def get_problem(n_atoms, atom_support, seed): X = get_mandril() rng = check_random_state(seed) n_channels, *sig_shape = X.shape valid_shape = get_valid_shape(sig_shape, atom_support) indices = np.c_[[ rng.randint(size_ax, size=(n_atoms)) for size_ax in valid_shape ]].T D = np.empty(shape=(n_atoms, n_channels, *atom_support)) for k, pt in enumerate(indices): D_slice = tuple( [Ellipsis] + [slice(v, v + size_ax) for v, size_ax in zip(pt, atom_support)]) D[k] = X[D_slice] sum_axis = tuple(range(1, D.ndim)) D /= np.sqrt(np.sum(D * D, axis=sum_axis, keepdims=True)) return X, D
def test_ztz(valid_shape, atom_shape, sparsity): n_atoms = 7 n_channels = 5 random_state = None rng = check_random_state(random_state) z = rng.randn(n_atoms, *valid_shape) z *= rng.rand(*z.shape) < sparsity D = rng.randn(n_atoms, n_channels, *atom_shape) ztz = compute_ztz(z, atom_shape) grad = np.sum( [[[fftconvolve(ztz_k0_k, d_kp, mode='valid') for d_kp in d_k] for ztz_k0_k, d_k in zip(ztz_k0, D)] for ztz_k0 in ztz], axis=1) cost = np.dot(D.ravel(), grad.ravel()) X_hat = reconstruct(z, D) assert np.isclose(cost, np.dot(X_hat.ravel(), X_hat.ravel()))
def test_ztz(valid_support, atom_support): tol = .5 reg = .1 n_atoms = 7 n_channels = 5 random_state = None sig_support = get_full_support(valid_support, atom_support) rng = check_random_state(random_state) X = rng.randn(n_channels, *sig_support) D = rng.randn(n_atoms, n_channels, *atom_support) D /= np.sqrt(np.sum(D * D, axis=(1, 2), keepdims=True)) z_hat, ztz, ztX, *_ = dicod(X, D, reg, tol=tol, n_workers=N_WORKERS, return_ztz=True, verbose=VERBOSE) ztz_full = compute_ztz(z_hat, atom_support) assert np.allclose(ztz_full, ztz) ztX_full = compute_ztX(z_hat, X) assert np.allclose(ztX_full, ztX)
def test_cost(valid_support, atom_support): tol = .5 reg = 0 n_atoms = 7 n_channels = 5 random_state = None sig_support = get_full_support(valid_support, atom_support) rng = check_random_state(random_state) D = rng.randn(n_atoms, n_channels, *atom_support) D /= np.sqrt(np.sum(D * D, axis=(1, 2), keepdims=True)) z = rng.randn(n_atoms, *valid_support) z *= rng.rand(n_atoms, *valid_support) > .5 X = rng.randn(n_channels, *sig_support) z_hat, *_, pobj, _ = dicod(X, D, reg, z0=z, tol=tol, n_workers=N_WORKERS, max_iter=1000, freeze_support=True, verbose=VERBOSE) cost = pobj[-1][2] assert np.isclose(cost, compute_objective(X, z_hat, D, reg))
def compute_z_hat(self): # Initialization of the algorithm variables random_state = check_random_state(self.random_state) i_seg = -1 n_coordinate_updates = 0 accumulator = 0 k0, pt0 = 0, None self.n_paused_worker = 0 # compute the number of coordinates n_atoms, *_ = self.D.shape seg_in_shape = self.workers_segments.get_seg_shape(self.rank, inner=True) n_coordinates = n_atoms * np.prod(seg_in_shape) self.init_cd_variables() diverging = False if flags.INTERACTIVE_PROCESSES and self.n_jobs == 1: import ipdb ipdb.set_trace() # noqa: E702 self.t_start = t_start = time.time() if self.timeout is not None: deadline = t_start + self.timeout else: deadline = None for ii in range(self.max_iter): # Display the progress of the algorithm self.progress(ii, max_ii=self.max_iter, unit="iterations") # Process incoming messages self.process_messages() # Increment the segment and select the coordinate to update try: i_seg = self.local_segments.increment_seg(i_seg) except ZeroDivisionError: print(self.local_segments.signal_shape, self.local_segments.n_seg_per_axis) raise if self.local_segments.is_active_segment(i_seg): k0, pt0, dz = _select_coordinate(self.dz_opt, self.dE, self.local_segments, i_seg, strategy=self.strategy, random_state=random_state) assert self.workers_segments.is_contained_coordinate( self.rank, pt0, inner=True), pt0 else: k0, pt0, dz = None, None, 0 # update the accumulator for 'random' strategy accumulator = max(abs(dz), accumulator) # If requested, check that the update chosen only have an impact on # the segment and its overlap area. if flags.CHECK_UPDATE_CONTAINED and pt0 is not None: self.workers_segments.check_area_contained( self.rank, pt0, self.overlap) # Check if the coordinate is soft-locked or not. soft_locked = False if (pt0 is not None and abs(dz) > self.tol and self.soft_lock != 'none'): n_lock = 1 if self.soft_lock == "corner" else 0 lock_slices = self.workers_segments.get_touched_overlap_slices( self.rank, pt0, np.array(self.overlap) + 1) # Only soft lock in the corners if len(lock_slices) > n_lock: max_on_lock = 0 for u_slice in lock_slices: max_on_lock = max( abs(self.dz_opt[u_slice]).max(), max_on_lock) soft_locked = max_on_lock > abs(dz) # Update the selected coordinate and beta, only if the update is # greater than the convergence tolerance and is contained in the # worker. If the update is not in the worker, this will # effectively work has a soft lock to prevent interferences. if abs(dz) > self.tol and not soft_locked: n_coordinate_updates += 1 # update the selected coordinate and beta self.coordinate_update(k0, pt0, dz) # Notify neighboring workers of the update if needed. pt_global = self.workers_segments.get_global_coordinate( self.rank, pt0) workers = self.workers_segments.get_touched_segments( pt=pt_global, radius=np.array(self.overlap) + 1) msg = np.array([k0, *pt_global, dz], 'd') self.notify_neighbors(msg, workers) if self.timing: t_update = time.time() - t_start self._log_updates.append( (t_update, ii, self.rank, k0, pt_global, dz)) # Inactivate the current segment if the magnitude of the update is # too small. This only work when using LGCD. if abs(dz) <= self.tol and self.strategy == "greedy": self.local_segments.set_inactive_segments(i_seg) # When workers are diverging, finish the worker to avoid having to # wait until max_iter for stopping the algorithm. if abs(dz) >= 1e3: self.info("diverging worker") self.wait_status_changed(status=constants.STATUS_FINISHED) diverging = True break # Check the stopping criterion and if we have locally converged, # wait either for an incoming message or for full convergence. if _check_convergence(self.local_segments, self.tol, ii, self.dz_opt, n_coordinates, self.strategy, accumulator=accumulator): if flags.CHECK_ACTIVE_SEGMENTS: inner_slice = (Ellipsis, ) + tuple([ slice(start, end) for start, end in self.local_segments.inner_bounds ]) assert np.all(abs(self.dz_opt[inner_slice]) <= self.tol) if self.check_no_transitting_message(): status = self.wait_status_changed() if status == constants.STATUS_STOP: self.debug( "LGCD converged with {} coordinate " "updates", n_coordinate_updates) break # Check is we reach the timeout if deadline is not None and time.time() >= deadline: self.stop_before_convergence("Reached timeout", n_coordinate_updates) break else: self.stop_before_convergence("Reached max_iter", n_coordinate_updates) self.synchronize_workers() assert diverging or self.check_no_transitting_message() runtime = time.time() - t_start comm = MPI.Comm.Get_parent() comm.gather([n_coordinate_updates, runtime], root=0) return n_coordinate_updates, runtime
def compute_z_hat(self): # compute the number of coordinates n_atoms, *_ = self.D.shape seg_in_support = self.workers_segments.get_seg_support(self.rank, inner=True) n_coordinates = n_atoms * np.prod(seg_in_support) # Initialization of the algorithm variables rng = check_random_state(self.random_state) order = None if self.strategy in ['cyclic', 'cyclic-r', 'random']: offset = np.r_[0, self.local_segments.inner_bounds[:, 0]] order = get_order_iterator((n_atoms, *seg_in_support), strategy=self.strategy, random_state=rng, offset=offset) i_seg = -1 dz = 1 n_coordinate_updates = 0 accumulator = 0 k0, pt0 = 0, None self.n_paused_worker = 0 t_local_init = self.init_cd_variables() diverging = False if flags.INTERACTIVE_PROCESSES and self.n_workers == 1: import ipdb ipdb.set_trace() # noqa: E702 self.t_start = t_start = time.time() t_run = 0 t_select_coord, t_update_coord = [], [] if self.timeout is not None: deadline = t_start + self.timeout else: deadline = None for ii in range(self.max_iter): # Display the progress of the algorithm self.progress(ii, max_ii=self.max_iter, unit="iterations", extra_msg=abs(dz)) # Process incoming messages self.process_messages() # Increment the segment and select the coordinate to update i_seg = self.local_segments.increment_seg(i_seg) if self.local_segments.is_active_segment(i_seg): t_start_selection = time.time() k0, pt0, dz = _select_coordinate(self.dz_opt, self.dE, self.local_segments, i_seg, strategy=self.strategy, order=order) selection_duration = time.time() - t_start_selection t_select_coord.append(selection_duration) t_run += selection_duration else: k0, pt0, dz = None, None, 0 # update the accumulator for 'random' strategy accumulator = max(abs(dz), accumulator) # If requested, check that the update chosen only have an impact on # the segment and its overlap area. if flags.CHECK_UPDATE_CONTAINED and pt0 is not None: self.workers_segments.check_area_contained( self.rank, pt0, self.overlap) # Check if the coordinate is soft-locked or not. soft_locked = False if (pt0 is not None and abs(dz) > self.tol and self.soft_lock != 'none'): n_lock = 1 if self.soft_lock == "corner" else 0 lock_slices = self.workers_segments.get_touched_overlap_slices( self.rank, pt0, np.array(self.overlap) + 1) # Only soft lock in the corners if len(lock_slices) > n_lock: max_on_lock = max([ abs(self.dz_opt[u_slice]).max() for u_slice in lock_slices ]) soft_locked = max_on_lock > abs(dz) # Update the selected coordinate and beta, only if the update is # greater than the convergence tolerance and is contained in the # worker. If the update is not in the worker, this will # effectively work has a soft lock to prevent interferences. if abs(dz) > self.tol and not soft_locked: t_start_update = time.time() # update the selected coordinate and beta self.coordinate_update(k0, pt0, dz) # Notify neighboring workers of the update if needed. pt_global = self.workers_segments.get_global_coordinate( self.rank, pt0) workers = self.workers_segments.get_touched_segments( pt=pt_global, radius=np.array(self.overlap) + 1) msg = np.array([k0, *pt_global, dz], 'd') self.notify_neighbors(msg, workers) # Logging of the time and the cost function if necessary update_duration = time.time() - t_start_update n_coordinate_updates += 1 t_run += update_duration t_update_coord.append(update_duration) if self.timing: self._log_updates.append( (t_run, ii, self.rank, k0, pt_global, dz)) # Inactivate the current segment if the magnitude of the update is # too small. This only work when using LGCD. if abs(dz) <= self.tol and self.strategy == "greedy": self.local_segments.set_inactive_segments(i_seg) # When workers are diverging, finish the worker to avoid having to # wait until max_iter for stopping the algorithm. if abs(dz) >= 1e3: self.info("diverging worker") self.wait_status_changed(status=constants.STATUS_FINISHED) diverging = True break # Check the stopping criterion and if we have locally converged, # wait either for an incoming message or for full convergence. if _check_convergence(self.local_segments, self.tol, ii, self.dz_opt, n_coordinates, self.strategy, accumulator=accumulator): if flags.CHECK_ACTIVE_SEGMENTS: inner_slice = (Ellipsis, ) + tuple([ slice(start, end) for start, end in self.local_segments.inner_bounds ]) assert np.all(abs(self.dz_opt[inner_slice]) <= self.tol) if self.check_no_transitting_message(): status = self.wait_status_changed() if status == constants.STATUS_STOP: self.debug( "LGCD converged with {} iterations ({} " "updates)", ii + 1, n_coordinate_updates) break # else: # time.sleep(.001) # Check is we reach the timeout if deadline is not None and time.time() >= deadline: self.stop_before_convergence("Reached timeout", ii + 1, n_coordinate_updates) break else: self.stop_before_convergence("Reached max_iter", ii + 1, n_coordinate_updates) self.synchronize_workers(with_main=True) assert diverging or self.check_no_transitting_message() runtime = time.time() - t_start if flags.CHECK_FINAL_BETA: worker_check_beta(self.rank, self.workers_segments, self.beta, self.D.shape) t_select_coord = np.mean(t_select_coord) t_update_coord = (np.mean(t_update_coord) if len(t_update_coord) > 0 else None) self.return_run_statistics(ii=ii, t_run=t_run, n_coordinate_updates=n_coordinate_updates, runtime=runtime, t_local_init=t_local_init, t_select_coord=t_select_coord, t_update_coord=t_update_coord)
def coordinate_descent(X_i, D, reg, z0=None, DtD=None, n_seg='auto', strategy='greedy', tol=1e-5, max_iter=100000, timeout=None, z_positive=False, freeze_support=False, return_ztz=False, timing=False, random_state=None, verbose=0): """Coordinate Descent Algorithm for 2D convolutional sparse coding. Parameters ---------- X_i : ndarray, shape (n_channels, *sig_support) Image to encode on the dictionary D D : ndarray, shape (n_atoms, n_channels, *atom_support) Current dictionary for the sparse coding reg : float Regularization parameter z0 : ndarray, shape (n_atoms, *valid_support) or None Warm start value for z_hat. If not present, z_hat is initialized to 0. DtD : ndarray, shape (n_atoms, n_atoms, 2 * valid_support - 1) or None Warm start value for DtD. If not present, it is computed on init. n_seg : int or 'auto' Number of segments to use for each dimension. If set to 'auto' use segments of twice the size of the dictionary. strategy : str in {strategies} Coordinate selection scheme for the coordinate descent. If set to 'greedy'|'gs-r', the coordinate with the largest value for dz_opt is selected. If set to 'random, the coordinate is chosen uniformly on the segment. If set to 'gs-q', the value that reduce the most the cost function is selected. In this case, dE must holds the value of this cost reduction. tol : float Tolerance for the minimal update size in this algorithm. max_iter : int Maximal number of iteration run by this algorithm. z_positive : boolean If set to true, the activations are constrained to be positive. freeze_support : boolean If set to True, only update the coefficient that are non-zero in z0. return_ztz : boolean If True, returns the constants ztz and ztX, used to compute D-updates. timing : boolean If set to True, log the cost and timing information. random_state : None or int or RandomState current random state to seed the random number generator. verbose : int Verbosity level of the algorithm. Return ------ z_hat : ndarray, shape (n_atoms, *valid_support) Activation associated to X_i for the given dictionary D """ n_channels, *sig_support = X_i.shape n_atoms, n_channels, *atom_support = D.shape valid_support = get_valid_support(sig_support, atom_support) if strategy not in STRATEGIES: raise ValueError("'The coordinate selection strategy should be in " "{}. Got '{}'.".format(STRATEGIES, strategy)) # compute sizes for the segments for LGCD. Auto gives segments of size # twice the support of the atoms. if n_seg == 'auto': n_seg = np.array(valid_support) // (2 * np.array(atom_support) - 1) n_seg = tuple(np.maximum(1, n_seg)) segments = Segmentation(n_seg, signal_support=valid_support) # Pre-compute constants for maintaining the auxillary variable beta and # compute the coordinate update values. constants = {} constants['norm_atoms'] = compute_norm_atoms(D) if DtD is None: constants['DtD'] = compute_DtD(D) else: constants['DtD'] = DtD # Initialization of the algorithm variables i_seg = -1 accumulator = 0 if z0 is None: z_hat = np.zeros((n_atoms,) + valid_support) else: z_hat = np.copy(z0) n_coordinates = z_hat.size # Get a random number genator from the given random_state rng = check_random_state(random_state) order = None if strategy in ['cyclic', 'cyclic-r', 'random']: order = get_order_iterator(z_hat.shape, strategy=strategy, random_state=rng) t_start_init = time.time() return_dE = strategy == "gs-q" beta, dz_opt, dE = _init_beta(X_i, D, reg, z_i=z0, constants=constants, z_positive=z_positive, return_dE=return_dE) if strategy == "gs-q": raise NotImplementedError("This is still WIP") if freeze_support: freezed_support = z0 == 0 dz_opt[freezed_support] = 0 else: freezed_support = None p_obj, next_log_iter = [], 1 t_init = time.time() - t_start_init if timing: p_obj.append((0, t_init, 0, compute_objective(X_i, z_hat, D, reg))) n_coordinate_updates = 0 t_run = 0 t_select_coord, t_update_coord = [], [] t_start = time.time() if timeout is not None: deadline = t_start + timeout else: deadline = None for ii in range(max_iter): if ii % 1000 == 0 and verbose > 0: print("\r[LGCD:PROGRESS] {:.0f}s - {:7.2%} iterations" .format(t_run, ii / max_iter), end='', flush=True) i_seg = segments.increment_seg(i_seg) if segments.is_active_segment(i_seg): t_start_selection = time.time() k0, pt0, dz = _select_coordinate(dz_opt, dE, segments, i_seg, strategy=strategy, order=order) selection_duration = time.time() - t_start_selection t_select_coord.append(selection_duration) t_run += selection_duration else: dz = 0 accumulator = max(abs(dz), accumulator) # Update the selected coordinate and beta, only if the update is # greater than the convergence tolerance. if abs(dz) > tol: t_start_update = time.time() # update the current solution estimate and beta beta, dz_opt, dE = coordinate_update( k0, pt0, dz, beta=beta, dz_opt=dz_opt, dE=dE, z_hat=z_hat, D=D, reg=reg, constants=constants, z_positive=z_positive, freezed_support=freezed_support) touched_segs = segments.get_touched_segments( pt=pt0, radius=atom_support) n_changed_status = segments.set_active_segments(touched_segs) # Logging of the time and the cost function if necessary update_duration = time.time() - t_start_update n_coordinate_updates += 1 t_run += update_duration t_update_coord.append(update_duration) if timing and ii + 1 >= next_log_iter: p_obj.append((ii + 1, t_run, np.sum(t_select_coord), compute_objective(X_i, z_hat, D, reg))) next_log_iter = next_log_iter * 1.3 # If debug flag CHECK_ACTIVE_SEGMENTS is set, check that all # inactive segments should be inactive if flags.CHECK_ACTIVE_SEGMENTS and n_changed_status: segments.test_active_segment(dz_opt, tol) elif strategy in ["greedy", 'gs-r']: segments.set_inactive_segments(i_seg) # check stopping criterion if _check_convergence(segments, tol, ii, dz_opt, n_coordinates, strategy, accumulator=accumulator): assert np.all(abs(dz_opt) <= tol) if verbose > 0: print("\r[LGCD:INFO] converged in {} iterations ({} updates)" .format(ii + 1, n_coordinate_updates)) break # Check is we reach the timeout if deadline is not None and time.time() >= deadline: if verbose > 0: print("\r[LGCD:INFO] Reached timeout. Done {} iterations " "({} updates). Max of |dz|={}." .format(ii + 1, n_coordinate_updates, abs(dz_opt).max())) break else: if verbose > 0: print("\r[LGCD:INFO] Reached max_iter. Done {} coordinate " "updates. Max of |dz|={}." .format(n_coordinate_updates, abs(dz_opt).max())) print(f"\r[LGCD:{strategy}] " f"t_select={np.mean(t_select_coord):.3e}s " f"t_update={np.mean(t_update_coord):.3e}s" ) runtime = time.time() - t_start if verbose > 0: print("\r[LGCD:INFO] done in {:.3f}s ({:.3f}s)" .format(runtime, t_run)) ztz, ztX = None, None if return_ztz: ztz = compute_ztz(z_hat, atom_support) ztX = compute_ztX(z_hat, X_i) p_obj.append([n_coordinate_updates, t_run, compute_objective(X_i, z_hat, D, reg)]) run_statistics = dict(iterations=ii + 1, runtime=runtime, t_init=t_init, t_run=t_run, n_updates=n_coordinate_updates, t_select=np.mean(t_select_coord), t_update=np.mean(t_update_coord)) return z_hat, ztz, ztX, p_obj, run_statistics
def evaluate_one(fname, std, n_atoms=None, reg=.2, n_jobs=10, window=True, random_state=None): rng = check_random_state(random_state) i = fname.split('.')[0].split('_')[-1] X, D, text_length = get_input(fname) X += std * X.std() * rng.randn(*X.shape) if 'PAMI' in fname: D = np.pad(D, [(0, 0), (0, 0), (4, 4), (4, 4)]) n_atoms = D.shape[0] if n_atoms is None else n_atoms atom_support = np.array(D.shape[-2:]) tag = f"l={text_length}_std={std}_{i}" if window: tag = f"{tag}_win" D_init = get_D_init(X, n_atoms, atom_support, strategy='patch', window=window, noise_level=.1, random_state=rng) D_rand = prox_d(rng.rand(*D_init.shape)) corr_rand = evaluate_D_hat(D, D_rand) score_rand = corr_rand.max(axis=1).mean() score_rand_2 = compute_best_assignment(corr_rand) print(f"[{tag}] Rand score: {score_rand}, {score_rand_2}") corr_init = evaluate_D_hat(D, D_init) score_init = corr_init.max(axis=1).mean() score_init_2 = compute_best_assignment(corr_init) print(f"[{tag}] Init score: {score_init}, {score_init_2}") D_cdl, meta_cdl = compute_cdl(X, n_atoms, atom_support, D_init, reg=.2, window=window, n_jobs=n_jobs) corr_cdl = evaluate_D_hat(D, D_cdl) score_cdl = corr_cdl.max(axis=1).mean() score_cdl_2 = compute_best_assignment(corr_cdl) print(f"[{tag}] CDL score: {score_cdl}, {score_cdl_2}") D_dl, meta_dl = compute_dl(X, n_atoms, atom_support, reg=1e-1, n_jobs=n_jobs) corr_dl = evaluate_D_hat(D, D_dl) score_dl = corr_dl.max(axis=1).mean() score_dl_2 = compute_best_assignment(corr_dl) print(f"[{tag}] DL score: {score_dl}, {score_dl_2}") return dict( text_length=int(text_length), noise_level=std, D=D, D_rand=D_rand, corr_rand=corr_rand, score_rand=score_rand, D_init=D_init, corr_init=corr_init, score_init=score_init, D_cdl=D_cdl, corr_cdl=corr_cdl, score_cdl=score_cdl, D_dl=D_dl, corr_dl=corr_dl, score_dl=score_dl, score_rand_2=score_rand_2, score_init_2=score_init_2, score_cdl_2=score_cdl_2, score_dl_2=score_dl_2, meta_dl=meta_dl, meta_cdl=meta_cdl, n_atoms=n_atoms, filename=fname, )
def get_D_init(X, n_atoms, atom_support, strategy='patch', window=True, noise_level=0.1, random_state=None): """Compute an initial dictionary Parameters ---------- X : ndarray, shape (n_channels, *signal_support) signal to be encoded. n_atoms: int and tuple Determine the shape of the dictionary. atom_support: tuple (int, int) support of the atoms strategy: str in {'patch', 'random'} (default: 'patch') Strategy to compute initial dictionary: - 'random': draw iid coefficients iid in [0, 1] - 'patch': draw patches from X uniformly without replacement. window: boolean (default: True) Whether or not the algorithm will use windowed dictionary. noise_level: float (default: .1) If larger than 0, add gaussian noise to the initial dictionary. This helps escaping sub-optimal state where one atom is used only in one place with strategy='patch'. random_state : int, RandomState instance or None (default) Determines random number generation for centroid initialization and random reassignment. Use an int to make the randomness deterministic. Returns ------- D_init : ndarray, shape (n_atoms, n_channels, *atom_support) initial dictionary """ rng = check_random_state(random_state) n_channels = X.shape[0] if strategy == 'random': D_init = rng.rand(n_atoms, n_channels, *atom_support) elif strategy == 'patch': D_init = init_dictionary(X, n_atoms=n_atoms, atom_support=atom_support, random_state=rng) else: raise NotImplementedError('strategy should be one of {patch, random}') # normalize the atoms D_init = prox_d(D_init) # Add a small noise to extracted patches. does not have a large influence # on the random init. if noise_level > 0: noise_level_ = noise_level * D_init.std(axis=(-1, -2), keepdims=True) noise = noise_level_ * rng.randn(*D_init.shape) D_init = prox_d(D_init + noise) # If the algorithm is windowed, correctly initiate the dictionary if window: atom_support = D_init.shape[-2:] tw = tukey_window(atom_support)[None, None] D_init *= tw return D_init
help='Number of atoms to learn') parser.add_argument('--window', action='store_true', help='If this flag is set, apply a window on the atoms' ' to promote border to 0.') parser.add_argument('--seed', type=int, default=None, help='Seed for the random number generator. ' 'Default to None.') parser.add_argument('--PAMI', action='store_true', help='Run the CDL on text with PAMI letters.') args = parser.parse_args() rng = check_random_state(args.seed) if args.PAMI: from benchmarks.dicodile_text_plot import plot_dictionary std = 3 std = .0001 fname = 'text_4_5000_PAMI.npz' res_item = evaluate_one(fname, std, n_atoms=args.n_atoms, n_jobs=args.n_jobs, window=args.window, random_state=rng) now = datetime.now() t_tag = now.strftime('%y-%m-%d_%Hh%M') save_name = OUTPUT_DIR / f'{BASE_FILE_NAME}_PAMI_{t_tag}.pkl'
def _select_coordinate(dz_opt, dE, segments, i_seg, strategy='greedy', random_state=None): """Pick a coordinate to update Parameters ---------- dz_opt : ndarray, shape (n_atoms, *valid_shape) Difference between the current value and the optimal value for each coordinate. dE : ndarray, shape (n_atoms, *valid_shape) or None Value of the reduction of the cost when moving a given coordinate to the optimal value dz_opt. This is only necessary when strategy is 'gs-q'. segments : dicod.utils.Segmentation Segmentation info for LGCD i_seg : int Current segment indices in the Segmentation object. strategy : str in { 'greedy' | 'random' | 'gs-r' | 'gs-q' } Coordinate selection scheme for the coordinate descent. If set to 'greedy'|'gs-r', the coordinate with the largest value for dz_opt is selected. If set to 'random, the coordinate is chosen uniformly on the segment. If set to 'gs-q', the value that reduce the most the cost function is selected. In this case, dE must holds the value of this cost reduction. random_state : None or int or RandomState current random state to seed the random number generator. """ if strategy == 'random': rng = check_random_state(random_state) n_atoms, *valid_shape = dz_opt.shape inner_bounds = segments.inner_bounds k0 = rng.randint(n_atoms) pt0 = () for start, end in inner_bounds: v0 = rng.randint(start, end) pt0 = pt0 + (v0, ) elif strategy in ['greedy', 'gs-r']: seg_slice = segments.get_seg_slice(i_seg, inner=True) dz_opt_seg = dz_opt[seg_slice] i0 = abs(dz_opt_seg).argmax() k0, *pt0 = np.unravel_index(i0, dz_opt_seg.shape) pt0 = segments.get_global_coordinate(i_seg, pt0) elif strategy == 'gs-q': seg_slice = segments.get_seg_slice(i_seg, inner=True) dE_seg = dE[seg_slice] i0 = abs(dE_seg).argmax() k0, *pt0 = np.unravel_index(i0, dE_seg.shape) pt0 = segments.get_global_coordinate(i_seg, pt0) else: raise ValueError( "'The coordinate selection strategy should be in " "{'greedy' | 'random' | 'cyclic'}. Got '{}'.".format(strategy)) # Get the coordinate update value dz = dz_opt[(k0, *pt0)] return k0, pt0, dz
def run_scaling_1d_benchmark(strategies, n_rep=1, max_workers=75, timeout=None, soft_lock='none', list_n_times=[151, 750], list_reg=[2e-1, 5e-1], random_state=None, collect=False): '''Run DICOD strategy for a certain problem with different value for n_workers and store the runtime in csv files if given a save_dir. Parameters ---------- strategies: list of str in { 'greedy', 'lgcd', 'random' } Algorithm to run the benchmark for n_rep: int (default: 10) Number of repetition to average the results. max_workers: int (default: 75) The strategy will be run on problems with a number of cores varying from 1 to max_workers in a log2 scale soft_lock: str in {'none', 'border'} Soft-lock mechanism to use in dicod timeout: int (default: None) maximal running time for DICOD. The default timeout is 2 hours list_n_times: list of int Size of the generated problems list_reg: list of float Regularization parameter of the considered problem list_tol: list of float Tolerance parameter used in DICOD. random_state: None or int or RandomState Seed for the random number generator. collect: bool If set to True, do not run any computation but only collect cached results. ''' # Parameters to generate the simulated problems n_times_atom = 250 n_atoms = 25 n_channels = 7 noise_level = 1 rng = check_random_state(random_state) # Parameters for the algorithm tol = 1e-8 dicod_args = dict(timing=False, timeout=timeout, max_iter=int(5e8), verbose=2) # Get the list of parameter to call list_n_workers = np.unique(np.logspace(0, np.log10(max_workers), 15, dtype=int)) list_n_workers = list_n_workers[::-1] list_seeds = rng.randint(MAX_INT, size=n_rep) strategies = [s[0] for s in strategies] list_args = itertools.product(list_n_workers, strategies, list_reg, list_n_times, list_seeds) common_args = dict(tol=tol, soft_lock=soft_lock, dicod_args=dicod_args, n_times_atom=n_times_atom, n_atoms=n_atoms, n_channels=n_channels, noise_level=noise_level) results = [] done, total = 0, 0 for (n_workers, strategy, reg, n_times, random_state) in list_args: total += 1 if collect: # if this option is set, only collect the entries that have already # been cached func_id, args_id = run_one._get_output_identifiers( n_workers=n_workers, strategy=strategy, reg=reg, n_times=n_times, **common_args, random_state=random_state) if not run_one.store_backend.contains_item((func_id, args_id)): continue done += 1 results.append(run_one( n_workers=n_workers, strategy=strategy, reg=reg, n_times=n_times, random_state=random_state, **common_args) ) # results = [run_one(n_workers=n_workers, strategy=strategy, reg=reg, # n_times=n_times, random_state=random_state, # **common_args) # for (n_workers, strategy, reg, # n_times, random_state) in list_args] # Save the results as a DataFrame results = pandas.DataFrame(results) results.to_pickle(get_save_file_name(ext='pkl')) if collect: print(f"Script: {done / total:7.2%}")
if __name__ == "__main__": import argparse parser = argparse.ArgumentParser('') parser.add_argument('--plot', action="store_true", help='Plot the results of the benchmarl') parser.add_argument('--n-rep', type=int, default=5, help='Number of repetition to average to compute the ' 'average running time.') args = parser.parse_args() seed = 422742 rng = check_random_state(seed) strategies = [('greedy', 'Greedy', 's-'), ('random', 'Random', "h-"), ('lgcd', "LGCD", 'o-')] if args.plot: list_n_times = [150, 750] strategies = [('greedy', 'Greedy', 's-'), ('random', 'Random', "h-"), ('lgcd', "LGCD", 'o-')] plot_scaling_1d_benchmark(strategies, list_n_times) else: strategies = [('greedy', 'Greedy', 's-'), ('lgcd', "LGCD", 'o-')] list_seeds = [rng.randint(MAX_INT) for _ in range(args.n_rep)] run_scaling_1d_benchmark(strategies, T=151)