def __init__(self, output_dir, ckpt_freq=-1, exit_time=None, split_by='features', mode='local', Ak=None, Ak_test=None, y_test=None, verbose=1, name=''): """ Parameters ---------- solver : CoCoASubproblemSolver a solver to be monitored. output_dir : str directory of output. ckpt_freq : Int frequency of the checkpoint. exit_time : float, optional exit if the program has been running for `exit_time`. (the default is None, which disable this criterion.) split_by : str, optional The data matrix is split by samples or features (the default is 'samples') mode : ['local', 'global', None], optional * `local` mode only logs duality gap of local solver. * `global` mode logs duality gap of the whole program. It takes more time to compute. """ self.name = name self.Ak = Ak self.Ak_test = Ak_test self.y_test = y_test self.do_prediction_tests = self.Ak_test is not None and self.y_test is not None self.rank = comm.get_rank() self.world_size = comm.get_world_size() self.running_time = 0 self.previous_time = time.time() self.exit_time = exit_time or np.inf self.verbose = verbose self.records = [] self.records_l = [] self.records_g = [] self.mode = mode self.ckpt_freq = ckpt_freq self.output_dir = output_dir os.makedirs(self.output_dir, exist_ok=True) self.model = None # If a problem is split by samples, then the total number of data points is unknown # in a local node. As a result, we will defer the division to the logging time. self.split_by_samples = split_by == 'samples' self._sigma_sum = None
def main(dataset, dataset_path, dataset_size, use_split_dataset, split_by, random_state, algoritmname, max_global_steps, local_iters, solvername, output_dir, exit_time, lambda_, l1_ratio, theta, graph_topology, c, logmode, ckpt_freq, n_connectivity): # Fix gamma = 1.0 according to: # Adding vs. Averaging in Distributed Primal-Dual Optimization gamma = 1.0 # Initialize process group comm.init_process_group('mpi') # Get rank of current process rank = comm.get_rank() world_size = comm.get_world_size() # Create graph with specified topology graph = define_graph_topology(world_size, graph_topology, n_connectivity=n_connectivity) if use_split_dataset: X, y = load_dataset_by_rank(dataset, rank, world_size, dataset_size, split_by, dataset_path=dataset_path, random_state=random_state) else: X, y = load_dataset(dataset, rank, world_size, dataset_size, split_by, dataset_path=dataset_path, random_state=random_state) # Define subproblem solver = configure_solver(name=solvername, split_by=split_by, l1_ratio=l1_ratio, lambda_=lambda_, C=c, random_state=random_state) # Add hooks to log and save metrics. monitor = Monitor(solver, output_dir, ckpt_freq, exit_time, split_by, logmode) # Always use this value throughout this project Akxk, xk = run_algorithm(algoritmname, X, y, solver, gamma, theta, max_global_steps, local_iters, world_size, graph, monitor) monitor.save(Akxk, xk, weightname='weight.npy', logname='result.csv')
def main(dataset): if dataset == 'inv': lam_stop = 3.15 lam = 0.01467 reg = True elif dataset == 'mg': lam = 1e-3 reg = True else: print('dataset not supported') return random_state = 42 # Fix gamma = 1.0 according to: # Adding vs. Averaging in Distributed Primal-Dual Optimization gamma = 1.0 theta = 1e-1 global_iters = 500 local_iters = 5 # Initialize process group comm.init_process_group('mpi') # Get rank of current process rank = comm.get_rank() world_size = comm.get_world_size() # Create graph with specified topology graphs_center = getGraphs(world_size) dataset_path = os.path.join('data', dataset, 'features', f'{world_size}') X, y, X_test, y_test = load_dataset_by_rank(dataset, rank, world_size, random_state=random_state, verbose=1) index = np.asarray(np.load(os.path.join(dataset_path, 'index.npy'), allow_pickle=True), dtype=np.int) index_test = np.asarray(np.load(os.path.join(dataset_path, 'index_test.npy'), allow_pickle=True), dtype=np.int) # Define subproblem solver = configure_solver(name='ElasticNet', l1_ratio=0.8, lambda_=lam/len(y), random_state=random_state) if dataset='inv': solver_stop = configure_solver(name='ElasticNet', l1_ratio=0.8, lambda_=lam_stop/len(y), random_state=random_state)
def main(dataset): random_state = 42 # Fix gamma = 1.0 according to: # Adding vs. Averaging in Distributed Primal-Dual Optimization gamma = 1.0 theta = 1e-3 global_iters = 500 local_iters = 20 # Initialize process group comm.init_process_group('mpi') # Get rank of current process rank = comm.get_rank() world_size = comm.get_world_size() # Create graph with specified topology graphs_center = getGraphs(world_size) dataset_path = os.path.join('data', dataset, 'features', f'{world_size}') X, y, X_test, y_test = load_dataset_by_rank(dataset, rank, world_size, random_state=random_state, verbose=1) index = np.asarray(np.load(os.path.join(dataset_path, 'index.npy'), allow_pickle=True), dtype=np.int) index_test = np.asarray(np.load(os.path.join(dataset_path, 'index_test.npy'), allow_pickle=True), dtype=np.int) # Define subproblem # lasso_solvers = getSolversByLambda(1, n_lambdas=10, size=len(y), random_state=random_state) # elasticnet_solvers = getSolversByLambda(0.5, n_lambdas=10, size=len(y), random_state=random_state) # l2_solvers = getSolversByLambda(0, n_lambdas=10, size=len(y), random_state=random_state) solver = configure_solver(name='ElasticNet', l1_ratio=0.8, lambda_=1e-3 / len(y), random_state=random_state) # Add hooks to log and save metrics. output_dir = os.path.join('out', 'report', dataset) clean_plots() # Run CoLA for topo in graphs_center: comm.barrier() if not graphs_center[topo]: continue suf = f'{world_size}-{topo}' mon_default = Monitor(output_dir, mode='all', verbose=1, Ak=X, Ak_test=X_test, y_test=y_test, name='Default') model_default = Cola(gamma, solver, theta, fit_intercept=False, normalize=True) mon_default.init(model_default, graphs_center[topo]) model_default = model_default.fit(X, y, graphs_center[topo], mon_default, global_iters, local_iters) # Show test stats if rank == 0: print(f'Default - {topo}') mon_default.show_test_statistics() # Save final model mon_default.save(modelname=f'model-default-{suf}.pickle', logname=f'result-default-{suf}.csv') mon_center = Monitor(output_dir, mode='all', verbose=1, Ak=X, Ak_test=X_test, y_test=y_test, name='Center') model_center = Cola(gamma, solver, theta, fit_intercept=True, normalize=True) mon_center.init(model_center, graphs_center[topo]) model_center = model_center.fit(X, y, graphs_center[topo], mon_center, global_iters, local_iters) # Show test stats if rank == 0: print(f'Center - {topo}') mon_center.show_test_statistics() # Save final model mon_center.save(modelname=f'model-center-{suf}.pickle', logname=f'result-center-{suf}.csv') # Run CoLA make_intercept_plots(f'{dataset}_{topo}_', mon_default, mon_center, None, index, index_test)
def main(dataset, dataset_path, dataset_size, datapoints, use_split_dataset, split_by, random_state, algoritmname, max_global_steps, local_iters, solvername, output_dir, exit_time, lambda_, l1_ratio, theta, graph_topology, c, logmode, ckpt_freq, n_connectivity, fit_intercept, normalize, verbose): # Fix gamma = 1.0 according to: # Adding vs. Averaging in Distributed Primal-Dual Optimization gamma = 1.0 # Initialize process group comm.init_process_group('mpi') # Get rank of current process rank = comm.get_rank() world_size = comm.get_world_size() # Create graph with specified topology graph = define_graph_topology(world_size, graph_topology, n_connectivity=n_connectivity, verbose=verbose) if use_split_dataset: if not dataset_path: dataset_path = os.path.join('data', dataset, split_by, f'{world_size}') X, y, X_test, y_test = load_dataset_by_rank(dataset, rank, world_size, dataset_size, datapoints, split_by, dataset_path=dataset_path, random_state=random_state, verbose=verbose) else: X, y = load_dataset(dataset, rank, world_size, dataset_size, datapoints, split_by, dataset_path=dataset_path, random_state=random_state, verbose=verbose) # Define subproblem solver = configure_solver(name=solvername, split_by=split_by, l1_ratio=l1_ratio, lambda_=lambda_, C=c, random_state=random_state) # Add hooks to log and save metrics. if algoritmname != 'cola': output_dir = os.path.join(output_dir, algoritmname) if dataset: output_dir = os.path.join(output_dir, dataset, f'{world_size:0>2}', graph_topology) monitor = Monitor(output_dir, ckpt_freq=ckpt_freq, exit_time=exit_time, split_by=split_by, mode=logmode, verbose=verbose, Ak=X, Ak_test=X_test, y_test=y_test) # Run CoLA comm.barrier() if algoritmname == 'cola': model = Cola(gamma, solver, theta, fit_intercept, normalize) monitor.init(model, graph) model = model.fit(X, y, graph, monitor, max_global_steps, local_iters) else: raise NotImplementedError() # Show test stats if X_test is not None: monitor.show_test_statistics() # Save final model monitor.save(modelname='model.pickle', logname=f'result.csv')