Пример #1
0
    def __init__(self,
                 output_dir,
                 ckpt_freq=-1,
                 exit_time=None,
                 split_by='features',
                 mode='local',
                 Ak=None,
                 Ak_test=None,
                 y_test=None,
                 verbose=1,
                 name=''):
        """
        Parameters
        ----------
        solver : CoCoASubproblemSolver
            a solver to be monitored.
        output_dir : str
            directory of output.
        ckpt_freq : Int
            frequency of the checkpoint.
        exit_time : float, optional
            exit if the program has been running for `exit_time`. (the default is None, which disable this criterion.)
        split_by : str, optional
            The data matrix is split by samples or features (the default is 'samples')
        mode : ['local', 'global', None], optional
             * `local` mode only logs duality gap of local solver. 
             * `global` mode logs duality gap of the whole program. It takes more time to compute.
        """
        self.name = name
        self.Ak = Ak
        self.Ak_test = Ak_test
        self.y_test = y_test
        self.do_prediction_tests = self.Ak_test is not None and self.y_test is not None

        self.rank = comm.get_rank()
        self.world_size = comm.get_world_size()

        self.running_time = 0
        self.previous_time = time.time()
        self.exit_time = exit_time or np.inf

        self.verbose = verbose

        self.records = []
        self.records_l = []
        self.records_g = []
        self.mode = mode
        self.ckpt_freq = ckpt_freq
        self.output_dir = output_dir
        os.makedirs(self.output_dir, exist_ok=True)
        self.model = None

        # If a problem is split by samples, then the total number of data points is unknown
        # in a local node. As a result, we will defer the division to the logging time.
        self.split_by_samples = split_by == 'samples'

        self._sigma_sum = None
Пример #2
0
def main(dataset, dataset_path, dataset_size, use_split_dataset, split_by,
         random_state, algoritmname, max_global_steps, local_iters, solvername,
         output_dir, exit_time, lambda_, l1_ratio, theta, graph_topology, c,
         logmode, ckpt_freq, n_connectivity):

    # Fix gamma = 1.0 according to:
    #   Adding vs. Averaging in Distributed Primal-Dual Optimization
    gamma = 1.0

    # Initialize process group
    comm.init_process_group('mpi')

    # Get rank of current process
    rank = comm.get_rank()
    world_size = comm.get_world_size()

    # Create graph with specified topology
    graph = define_graph_topology(world_size,
                                  graph_topology,
                                  n_connectivity=n_connectivity)

    if use_split_dataset:
        X, y = load_dataset_by_rank(dataset,
                                    rank,
                                    world_size,
                                    dataset_size,
                                    split_by,
                                    dataset_path=dataset_path,
                                    random_state=random_state)
    else:
        X, y = load_dataset(dataset,
                            rank,
                            world_size,
                            dataset_size,
                            split_by,
                            dataset_path=dataset_path,
                            random_state=random_state)

    # Define subproblem
    solver = configure_solver(name=solvername,
                              split_by=split_by,
                              l1_ratio=l1_ratio,
                              lambda_=lambda_,
                              C=c,
                              random_state=random_state)

    # Add hooks to log and save metrics.
    monitor = Monitor(solver, output_dir, ckpt_freq, exit_time, split_by,
                      logmode)

    # Always use this value throughout this project
    Akxk, xk = run_algorithm(algoritmname, X, y, solver, gamma, theta,
                             max_global_steps, local_iters, world_size, graph,
                             monitor)

    monitor.save(Akxk, xk, weightname='weight.npy', logname='result.csv')
Пример #3
0
def main(dataset):
    if dataset == 'inv':
        lam_stop = 3.15
        lam = 0.01467
        reg = True
    elif dataset == 'mg':
        lam = 1e-3
        reg = True
    else:
        print('dataset not supported')
        return
    random_state = 42

    # Fix gamma = 1.0 according to:
    #   Adding vs. Averaging in Distributed Primal-Dual Optimization
    gamma = 1.0
    theta = 1e-1
    global_iters = 500
    local_iters = 5
    # Initialize process group
    comm.init_process_group('mpi')

    # Get rank of current process
    rank = comm.get_rank()
    world_size = comm.get_world_size()

    # Create graph with specified topology
    graphs_center = getGraphs(world_size)

    dataset_path = os.path.join('data', dataset, 'features', f'{world_size}')
    X, y, X_test, y_test = load_dataset_by_rank(dataset, rank, world_size, random_state=random_state, verbose=1)
    index = np.asarray(np.load(os.path.join(dataset_path, 'index.npy'), allow_pickle=True), dtype=np.int)
    index_test = np.asarray(np.load(os.path.join(dataset_path, 'index_test.npy'), allow_pickle=True), dtype=np.int)

    # Define subproblem
    solver = configure_solver(name='ElasticNet', l1_ratio=0.8, lambda_=lam/len(y), random_state=random_state)
    if dataset='inv':
        solver_stop = configure_solver(name='ElasticNet', l1_ratio=0.8, lambda_=lam_stop/len(y), random_state=random_state)
Пример #4
0
def main(dataset):
    random_state = 42

    # Fix gamma = 1.0 according to:
    #   Adding vs. Averaging in Distributed Primal-Dual Optimization
    gamma = 1.0
    theta = 1e-3
    global_iters = 500
    local_iters = 20
    # Initialize process group
    comm.init_process_group('mpi')

    # Get rank of current process
    rank = comm.get_rank()
    world_size = comm.get_world_size()

    # Create graph with specified topology
    graphs_center = getGraphs(world_size)

    dataset_path = os.path.join('data', dataset, 'features', f'{world_size}')
    X, y, X_test, y_test = load_dataset_by_rank(dataset,
                                                rank,
                                                world_size,
                                                random_state=random_state,
                                                verbose=1)
    index = np.asarray(np.load(os.path.join(dataset_path, 'index.npy'),
                               allow_pickle=True),
                       dtype=np.int)
    index_test = np.asarray(np.load(os.path.join(dataset_path,
                                                 'index_test.npy'),
                                    allow_pickle=True),
                            dtype=np.int)

    # Define subproblem
    # lasso_solvers = getSolversByLambda(1, n_lambdas=10, size=len(y), random_state=random_state)
    # elasticnet_solvers = getSolversByLambda(0.5, n_lambdas=10, size=len(y), random_state=random_state)
    # l2_solvers = getSolversByLambda(0, n_lambdas=10, size=len(y), random_state=random_state)
    solver = configure_solver(name='ElasticNet',
                              l1_ratio=0.8,
                              lambda_=1e-3 / len(y),
                              random_state=random_state)

    # Add hooks to log and save metrics.
    output_dir = os.path.join('out', 'report', dataset)
    clean_plots()
    # Run CoLA
    for topo in graphs_center:
        comm.barrier()
        if not graphs_center[topo]:
            continue
        suf = f'{world_size}-{topo}'

        mon_default = Monitor(output_dir,
                              mode='all',
                              verbose=1,
                              Ak=X,
                              Ak_test=X_test,
                              y_test=y_test,
                              name='Default')
        model_default = Cola(gamma,
                             solver,
                             theta,
                             fit_intercept=False,
                             normalize=True)
        mon_default.init(model_default, graphs_center[topo])
        model_default = model_default.fit(X, y, graphs_center[topo],
                                          mon_default, global_iters,
                                          local_iters)

        # Show test stats
        if rank == 0:
            print(f'Default - {topo}')
        mon_default.show_test_statistics()
        # Save final model
        mon_default.save(modelname=f'model-default-{suf}.pickle',
                         logname=f'result-default-{suf}.csv')

        mon_center = Monitor(output_dir,
                             mode='all',
                             verbose=1,
                             Ak=X,
                             Ak_test=X_test,
                             y_test=y_test,
                             name='Center')
        model_center = Cola(gamma,
                            solver,
                            theta,
                            fit_intercept=True,
                            normalize=True)
        mon_center.init(model_center, graphs_center[topo])
        model_center = model_center.fit(X, y, graphs_center[topo], mon_center,
                                        global_iters, local_iters)

        # Show test stats
        if rank == 0:
            print(f'Center - {topo}')
        mon_center.show_test_statistics()

        # Save final model
        mon_center.save(modelname=f'model-center-{suf}.pickle',
                        logname=f'result-center-{suf}.csv')

        # Run CoLA
        make_intercept_plots(f'{dataset}_{topo}_', mon_default, mon_center,
                             None, index, index_test)
Пример #5
0
def main(dataset, dataset_path, dataset_size, datapoints, use_split_dataset,
         split_by, random_state, algoritmname, max_global_steps, local_iters,
         solvername, output_dir, exit_time, lambda_, l1_ratio, theta,
         graph_topology, c, logmode, ckpt_freq, n_connectivity, fit_intercept,
         normalize, verbose):

    # Fix gamma = 1.0 according to:
    #   Adding vs. Averaging in Distributed Primal-Dual Optimization
    gamma = 1.0

    # Initialize process group
    comm.init_process_group('mpi')

    # Get rank of current process
    rank = comm.get_rank()
    world_size = comm.get_world_size()

    # Create graph with specified topology
    graph = define_graph_topology(world_size,
                                  graph_topology,
                                  n_connectivity=n_connectivity,
                                  verbose=verbose)

    if use_split_dataset:
        if not dataset_path:
            dataset_path = os.path.join('data', dataset, split_by,
                                        f'{world_size}')
        X, y, X_test, y_test = load_dataset_by_rank(dataset,
                                                    rank,
                                                    world_size,
                                                    dataset_size,
                                                    datapoints,
                                                    split_by,
                                                    dataset_path=dataset_path,
                                                    random_state=random_state,
                                                    verbose=verbose)
    else:
        X, y = load_dataset(dataset,
                            rank,
                            world_size,
                            dataset_size,
                            datapoints,
                            split_by,
                            dataset_path=dataset_path,
                            random_state=random_state,
                            verbose=verbose)

    # Define subproblem
    solver = configure_solver(name=solvername,
                              split_by=split_by,
                              l1_ratio=l1_ratio,
                              lambda_=lambda_,
                              C=c,
                              random_state=random_state)

    # Add hooks to log and save metrics.
    if algoritmname != 'cola':
        output_dir = os.path.join(output_dir, algoritmname)
    if dataset:
        output_dir = os.path.join(output_dir, dataset, f'{world_size:0>2}',
                                  graph_topology)
    monitor = Monitor(output_dir,
                      ckpt_freq=ckpt_freq,
                      exit_time=exit_time,
                      split_by=split_by,
                      mode=logmode,
                      verbose=verbose,
                      Ak=X,
                      Ak_test=X_test,
                      y_test=y_test)

    # Run CoLA
    comm.barrier()
    if algoritmname == 'cola':
        model = Cola(gamma, solver, theta, fit_intercept, normalize)
        monitor.init(model, graph)
        model = model.fit(X, y, graph, monitor, max_global_steps, local_iters)
    else:
        raise NotImplementedError()

    # Show test stats
    if X_test is not None:
        monitor.show_test_statistics()

    # Save final model
    monitor.save(modelname='model.pickle', logname=f'result.csv')