Пример #1
0
    def __init__(self,
                 root,
                 train=True,
                 transform=None,
                 target_transform=None,
                 download=False):
        super(FEMNIST, self).__init__(root,
                                      transform=transform,
                                      target_transform=target_transform)
        self.train = train  # training set or test set

        if download:
            self.download()

        if not self._check_exists():
            raise RuntimeError('Dataset not found.' +
                               ' You can use download=True to download it')

        train_data_dir = os.path.join('..', 'data', 'femnist', 'FEMNIST',
                                      'train')
        test_data_dir = os.path.join('..', 'data', 'femnist', 'FEMNIST',
                                     'test')
        self.dict_users = {}

        if self.train == True:
            self.users, groups, self.data = read_data(train_data_dir,
                                                      test_data_dir,
                                                      train_flag=True)
        else:
            self.users, groups, self.data = read_data(train_data_dir,
                                                      test_data_dir,
                                                      train_flag=False)

        class_names_map = [
            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
            19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
            36, 37, 12, 38, 39, 40, 41, 42, 18, 19, 20, 21, 22, 43, 24, 25, 44,
            45, 28, 46, 30, 31, 32, 33, 34, 35
        ]
        # TODO: automate this
        if False:  # 47 classess
            for i in range(len(self.users)):
                for j in range(len(self.data[self.users[i]]['y'])):
                    ll = self.data[self.users[i]]['y'][j]
                    self.data[self.users[i]]['y'][j] = class_names_map[ll]

        counter = 0
        for i in range(len(self.users)):
            lst = list(counter + np.arange(len(self.data[self.users[i]]['y'])))
            self.dict_users.update({i: set(lst)})
            counter = lst[-1] + 1

        self.dict_index = {
        }  # define a dictionary to keep the location of a sample and the corresponding
        length_data = 0
        for i in range(len(self.users)):
            for j in range(len(self.data[self.users[i]]['y'])):
                self.dict_index[length_data] = [i, j]
                length_data += 1
        self.length_data = length_data
Пример #2
0
    def __init__(self, dataset, algorithm, model, batch_size, learning_rate, L,
                 num_glob_iters, local_epochs, users_per_round, similarity,
                 noise, times):
        super().__init__(dataset, algorithm, model[0], batch_size,
                         learning_rate, L, num_glob_iters, local_epochs,
                         users_per_round, similarity, noise, times)
        self.control_norms = []

        # Initialize data for all  users
        data = read_data(dataset)
        total_users = len(data[0])
        for i in range(total_users):
            id, train, test = read_user_data(i, data, dataset)
            user = UserSCAFFOLD(id, train, test, model, batch_size,
                                learning_rate, L, local_epochs)
            self.users.append(user)
            self.total_train_samples += user.train_samples

        if self.noise:
            self.communication_thresh = rayleigh.ppf(
                1 - users_per_round / total_users)  # h_min

        print("Number of users / total users:", users_per_round, " / ",
              total_users)

        self.server_controls = [
            torch.zeros_like(p.data) for p in self.model.parameters()
            if p.requires_grad
        ]

        print("Finished creating SCAFFOLD server.")
Пример #3
0
def setup_clients(dataset, model=None):
    """Instantiates clients based on given train and test data directories.
    Return:
        all_clients: list of Client objects.
    """
    train_data_dir = os.path.join('..', 'data', dataset, 'data', 'train')
    test_data_dir = os.path.join('..', 'data', dataset, 'data', 'test')

    users, groups, train_data, test_data = read_data(train_data_dir,
                                                     test_data_dir)
    if len(groups) == 0:
        groups = [[] for _ in users]

    #LOOP OVER Predefined NumbOfUsers
    n = 0
    all_clients = []
    for u, g in zip(users, groups):
        all_clients.append(Client(u, g, train_data[u], test_data[u], model))
        print("User", n, " Client:", u, " ->  Train data size:",
              len(train_data[u]['y']), "samples")
        n += 1
        if (n == NumbOfUsers): break

    # all_clients = [Client(u, g, train_data[u], test_data[u], model) for u, g in zip(users, groups)]
    return all_clients
Пример #4
0
def setup_client_servers(dataset,
                         seed,
                         params,
                         sketcher,
                         model_cls,
                         use_val_set=False,
                         num_client_servers=1):
    """Instantiates clients based on given train and test data directories.

    Return:
        all_clients: list of Client objects.
    """
    eval_set = 'test' if not use_val_set else 'val'
    # train_data_dir = os.path.join('..', 'data', dataset, 'data', 'train')
    # test_data_dir = os.path.join('..', 'data', dataset, 'data', eval_set)
    train_dir = DATA_PATH + ['data', dataset, 'train']
    test_dir = DATA_PATH + ['data', dataset, eval_set]
    train_data_dir = os.path.join(*train_dir)
    test_data_dir = os.path.join(*test_dir)
    users, groups, train_data, test_data = read_data(train_data_dir,
                                                     test_data_dir)

    client_servers = create_client_servers(seed, params, users, groups,
                                           train_data, test_data, sketcher,
                                           model_cls, num_client_servers)

    return client_servers
Пример #5
0
    def setup_clients(self, num_setup):
        """Instantiates clients based on given train and test data directories.

            Return:
                all_clients: list of Client objects.
            """
        eval_set = 'test'
        train_data_dir = os.path.join('..', 'data', self.dataset, 'data',
                                      'train')
        test_data_dir = os.path.join('..', 'data', self.dataset, 'data',
                                     eval_set)

        users, groups, train_data, test_data = read_data(
            train_data_dir, test_data_dir)
        n = int(len(users) * num_setup)
        users = users[:n + 1]

        if len(groups) == 0:
            groups = [[] for _ in users]

        # Create models
        model_params = MODEL_PARAMS[self.model_path]
        if self.args.lr != -1:
            model_params_list = list(model_params)
            model_params_list[0] = self.args.lr
            model_params = tuple(model_params_list)

        for i, u in enumerate(users):
            # client = Client(u, train_data[u], test_data[u], self.seed )
            client = Client(u, train_data[u], test_data[u], self.seed + i)
            self.clients.append(client)

        return self.clients
Пример #6
0
    def __init__(self, experiment, device, dataset, algorithm, model, batch_size, learning_rate, alpha, eta, L, num_glob_iters,
                 local_epochs, optimizer, num_edges, times):
        super().__init__(experiment, device, dataset,algorithm, model[0], batch_size, learning_rate, alpha, eta, L, num_glob_iters,
                         local_epochs, optimizer, num_edges, times)

        # Initialize data for all  edges
        data = read_data(dataset, read_optimal_weights=False)

        self.optimal_weights = None
        self.optimal_loss_unreg = None  # Unregularized loss
        self.optimal_loss_reg = None    # Regularized loss with parameter L
        if data[-1] is not None:
            # Synthetic dataset: save the optimal weights for comparison later
            self.optimal_weights = data[-2]
            self.optimal_loss_unreg = data[-1]
            self.optimal_loss_reg = (self.L / 2) * (np.linalg.norm(data[-1]) ** 2)

        total_edges = len(data[0])

        for i in range(total_edges):

            id, train, test = read_edge_data(i, data, dataset)

            if(algorithm == "DONE"):
                edge = edgeDONE(device, id, train, test, model, batch_size, learning_rate, alpha, eta, L, local_epochs, optimizer)

            if(algorithm == "FirstOrder"):
                edge = edgeFiOrder(device, id, train, test, model, batch_size, learning_rate, alpha, eta, L, local_epochs, optimizer)

            if(algorithm == "DANE"):
                edge = edgeDANE(device, id, train, test, model, batch_size, learning_rate, alpha, eta, L, local_epochs, optimizer)

            if algorithm == "New":
                edge = edgeNew(device, id, train, test, model, batch_size, learning_rate, alpha, eta, L, local_epochs, optimizer)

            if algorithm == "GD":
                edge = edgeGD(device, id, train, test, model, batch_size, learning_rate, alpha, eta, L, local_epochs, optimizer)
            
            if algorithm == "FedAvg":
                edge = edgeAvg(device, id, train, test, model, batch_size, learning_rate, alpha, eta, L, local_epochs, optimizer)

            if(algorithm == "FEDL"):
                edge = edgeFEDL(device, id, train, test, model, batch_size, learning_rate, alpha, eta, L, local_epochs, optimizer)

            if(algorithm == "Newton"):
                edge = edgeNewton(device, id, train, test, model, batch_size, learning_rate, alpha, eta, L, local_epochs, optimizer)

            if(algorithm == "GT"):
                edge = edgeGT(device, id, train, test, model, batch_size, learning_rate, alpha, eta, L, local_epochs, optimizer)
            if(algorithm == "PGT"):
                edge = edgePGT(device, id, train, test, model, batch_size, learning_rate, alpha, eta, L, local_epochs, optimizer)
            
            self.edges.append(edge)
            self.total_train_samples += edge.train_samples

        print("Number of edges / total edges:", num_edges, " / ", total_edges)
Пример #7
0
def setup_clients(dataset, model=None, use_val_set=None):
    eval_set = "test" if not use_val_set else "val"
    train_data_dir = os.path.join("data", dataset, "data", "train")
    #print(train_data_dir)
    test_date_dir = os.path.join("data", dataset, "data", eval_set)
    #print(test_date_dir)
    data = read_data(train_data_dir, test_date_dir)
    users, groups, train_data, test_data = data
    clients = create_clients(users, groups, train_data, test_data, model)
    return clients
Пример #8
0
def main():
    args = parse_args()
    
    train_data_dir = os.path.join('..', 'data', args.dataset, 'data', 'train')
    test_data_dir = os.path.join('..', 'data', args.dataset, 'data', 'test')   
    users, groups, train_data, test_data = read_data(train_data_dir, test_data_dir)
    
    trainer = MlheadTrainer(args, users, groups, train_data, test_data)
    trainer.train(args)
    trainer.finish(args)
def create_clients(model, args):
    train_data_dir = os.path.join('..', 'data', args.dataset, 'data', 'train')
    test_data_dir = os.path.join('..', 'data', args.dataset, 'data', 'test')
    users, groups, train_data, test_data = read_data(train_data_dir,
                                                     test_data_dir)
    if len(groups) == 0:
        groups = [[] for _ in users]
    clients = [
        Client(u, g, train_data[u], test_data[u], model)
        for u, g in zip(users, groups)
    ]
    return clients
Пример #10
0
def main():
    # Read in the command line options
    options = read_options()
    # Generate the search space for learning rates
    search = get_search_space(**options)
    # Generate the random seeds to test
    seeds = range(options['seed'], options['seed'] + options['num_seeds'])
    # Read in the data
    train_path = os.path.join('..', 'data', options['dataset'],
                              options['dataloc'], 'train.json')
    test_path = os.path.join('..', 'data', options['dataset'],
                             options['dataloc'], 'test.json')
    print('Reading data...')
    dataset = read_data(train_path, test_path)
    print('Reading data done.')
    # Import model
    if options['dataset'].startswith('synthetic'):
        model_path = '%s.%s.%s' % ('models', 'synthetic', options['model'])
    else:
        model_path = '%s.%s.%s' % ('models', options['dataset'],
                                   options['model'])
    # Import server
    server_path = 'trainers.{}'.format(options['optimizer'])
    # Try all seeds and learning rates
    args = list(itertools.product(search, seeds))
    results = parallel_execute(args, dataset, options, model_path, server_path)
    losses = reduce_results(args, results)
    results_reduced = list(zip(search, losses))
    print(results_reduced)
    print('Best result: {} (index {})'.format(
        results_reduced[np.argmin(losses)], np.argmin(losses)))
    # Make output directory
    if not os.path.exists('results'):
        os.mkdir('results')
    note = '_' + options['note'] if len(options['note']) > 0 else ''

    def get_aux_param(options):
        optim = options['optimizer']
        if optim == 'fedio':
            return options['rho']
        elif optim == 'fedprox':
            return options['mu']
        return 0

    out_file = 'results/tune_{}_{}_{}_{}_{:.3f}_{}_{}_{}_{}_{}{}.csv'.format(
        options['optimizer'], options['dataset'], options['dataloc'],
        options['model'], get_aux_param(options), options['num_rounds'],
        options['clients_per_round'], options['num_iters'], options['seed'],
        options['num_seeds'], note)
    pd.DataFrame(results_reduced,
                 columns=['lr', 'loss']).set_index('lr').to_csv(out_file)
    return
Пример #11
0
def setup_clients(dataset,
                  model=None,
                  use_val_set=False,
                  poison_fraction=0,
                  poison_type=PoisonType.NONE):
    """Instantiates clients based on given train and test data directories.

    Return:
        all_clients: list of Client objects.
    """
    eval_set = 'test' if not use_val_set else 'val'
    train_data_dir = os.path.join('leaf', 'data', dataset, 'data', 'train')
    test_data_dir = os.path.join('leaf', 'data', dataset, 'data', eval_set)

    users, groups, train_data, test_data = read_data(train_data_dir,
                                                     test_data_dir)

    clients = create_clients(users, groups, train_data, test_data, model)

    num_malicious_clients = math.floor(len(clients) * poison_fraction)
    malicious_clients = [
        client.id for client in clients[:num_malicious_clients]
    ]
    if poison_type == PoisonType.LABELFLIP:
        # get reference fipping data in case a client possesses no data of class FLIP_FROM_CLASS
        reference_data = []
        for j in range(len(clients)):
            if len([
                    clients[j].train_data['x'][i]
                    for i in range(len(clients[j].train_data['y']))
                    if clients[j].train_data['y'][i] == FLIP_FROM_CLASS
            ]) > 10:
                reference_data = [
                    clients[j].train_data['x'][i]
                    for i in range(len(clients[j].train_data['y']))
                    if clients[j].train_data['y'][i] == FLIP_FROM_CLASS
                ]
                break
        for client in clients[:num_malicious_clients]:
            # flip labels
            client_label_counter = len(client.train_data['y'])
            flip_data = [
                client.train_data['x'][i] for i in range(client_label_counter)
                if client.train_data['y'][i] == FLIP_FROM_CLASS
            ]
            if len(flip_data) == 0:
                flip_data = reference_data
            client.train_data['x'] = (flip_data * math.ceil(
                client_label_counter / len(flip_data)))[:client_label_counter]
            client.train_data['y'] = [FLIP_TO_CLASS] * client_label_counter

    return clients, malicious_clients
Пример #12
0
def setup_clients(dataset, model=None):
    """Instantiates clients based on given train and test data directories.

    Return:
        all_clients: list of Client objects.
    """
    train_data_dir = os.path.join('..', 'data', dataset, 'data', 'train')
    test_data_dir = os.path.join('..', 'data', dataset, 'data', 'test')

    users, groups, train_data, test_data = read_data(train_data_dir, test_data_dir)
    if len(groups) == 0:
        groups = [[] for _ in users]
    all_clients = [Client(u, g, train_data[u], test_data[u], model) for u, g in zip(users, groups)]
    return all_clients
Пример #13
0
def setup_clients(dataset, model=None):
    """Instantiates clients based on given train and test data directories.

    Return:
        all_clients: list of Client objects.
    """
    train_data_dir = os.path.join('..', 'data', dataset, 'data', 'train')
    test_data_dir = os.path.join('..', 'data', dataset, 'data', 'test')

    users, groups, train_data, test_data = read_data(train_data_dir, test_data_dir)

    clients = create_clients(users, groups, train_data, test_data, model)

    return clients
Пример #14
0
    def __init__(self, dataset,algorithm, model, batch_size, learning_rate, beta, lamda, num_glob_iters,
                 local_epochs, optimizer, num_users,times):
        super().__init__(dataset,algorithm, model[0], batch_size, learning_rate, beta, lamda, num_glob_iters,
                         local_epochs, optimizer, num_users, times)

        # Initialize data for all  users
        data = read_data(dataset)
        total_users = len(data[0])
        for i in range(total_users):
            id, train , test = read_user_data(i, data, dataset)
            user = UserPerAvg(id, train, test, model, batch_size, learning_rate, beta, lamda, local_epochs, optimizer ,total_users , num_users)
            self.users.append(user)
            self.total_train_samples += user.train_samples
        print("Number of users / total users:",num_users, " / " ,total_users)
        print("Finished creating Local Per-Avg.")
Пример #15
0
def main():

    # Parse command line arguments
    options, model, server = read_options()

    # Read data
    train_path = os.path.join('data', options['dataset'], 'orig', 'train.json')
    test_path = os.path.join('data', options['dataset'], 'orig', 'test.json')
    print('Reading data...')
    dataset = read_data(train_path, test_path)
    print('Reading data done.')

    optim = server(options, model, dataset)
    optim.run()
    optim.save()
Пример #16
0
def setup_clients(cfg, model=None, use_val_set=False):
    """Instantiates clients based on given train and test data directories.

    Return:
        all_clients: list of Client objects.
    """
    eval_set = 'test' if not use_val_set else 'val'
    train_data_dir = os.path.join('..', 'data', cfg.dataset, 'data', 'train')
    test_data_dir = os.path.join('..', 'data', cfg.dataset, 'data', eval_set)

    users, groups, train_data, test_data = read_data(train_data_dir, test_data_dir)

    clients = create_clients(users, groups, train_data, test_data, model, cfg)

    return clients
def main():
    args = parse_job_args()
    config = read_yamlconfig(args)
    
    train_data_dir = os.path.join('..', 'data', args.dataset, 'data', 'train')
    test_data_dir = os.path.join('..', 'data', args.dataset, 'data', 'test')   
    users, groups, train_data, test_data = read_data(train_data_dir, test_data_dir)
    
    exp_seeds, book_keep = config["exp-seeds"], [0.] * len(config["exp-seeds"])
    
    for j, rnd_sed in enumerate(exp_seeds):
        config["seed"] = rnd_sed
        if args.experiment == 'fedavg':
            trainer = Fedavg_Trainer(users, groups, train_data, test_data)
            metric = trainer.begins(config, args)
            trainer.ends()
        elif args.experiment == 'fedprox':
            trainer = Fedprox_Trainer(users, groups, train_data, test_data)
            metric = trainer.begins(config, args)
            trainer.ends()
        elif args.experiment == 'fedcluster':
            pass
        elif args.experiment == 'fedsgd':
            trainer = Fedsgd_Trainer(users, groups, train_data, test_data)
            metric =trainer.begins(config, args)
            trainer.ends()            
        elif args.experiment == 'fedbayes':
            trainer = Fedbayes_Sing_Trainer(users, groups, train_data, test_data)
            metric =trainer.begins(config, args)
            trainer.ends()
        elif args.experiment == 'modelsaver':
            trainer = Model_Saver(users, groups, train_data, test_data)
            metric = trainer.begins(config, args)
            trainer.ends()        
        elif args.experiment == 'fedsem':
            trainer = Fedsem_Trainer(users, groups, train_data, test_data) 
            metric = trainer.begins(config, args)
            trainer.ends() 
        else:
            print("Applications not defined. Please check configs directory if the name is right.")
            break
        book_keep[j] = metric
        
    finals = np.array(book_keep) * 100
    print(finals)
    print("{} runs - std: {}, med: {}".format(len(exp_seeds), 
                                              np.var(finals),
                                             np.median(finals)))        
Пример #18
0
def setup_clients(aggregation, e, env, dataset, model=None):
    """Instantiates clients based on given train and test data directories.

    Return:
        all_clients: list of Client objects.
    """
    train_data_dir = os.path.join('..', 'data', dataset, 'data', 'train')
    test_data_dir = os.path.join('..', 'data', dataset, 'data', 'test')

    users, groups, train_data, test_data = read_data(train_data_dir,
                                                     test_data_dir)
    print(9898334353252435, len(users))

    clients = create_clients(aggregation, e, env, users, groups, train_data,
                             test_data, model)

    return clients
Пример #19
0
def setup_clients(dataset, model=None, use_val_set=False):
    """Load train, test data and instantiate clients.
    Args:
        model: The shared ClientModel object for all clients.
        args: Args entered from the command.
    Returns:
        clients: List of all client objects.
    """
    eval_set = "test" if not use_val_set else "val"
    train_data_dir = os.path.join("data", dataset, "data", "train")
    test_data_dir = os.path.join("data", dataset, "data", eval_set)

    data = read_data(train_data_dir, test_data_dir)
    users, groups, train_data, test_data = data

    clients = create_clients(users, groups, train_data, test_data, model)
    return clients
Пример #20
0
def main():

    args = parse_args()

    # client_id = sys.argv[1]
    # tangle_name = sys.argv[2]
    client_id = 'f0044_12'
    tangle_name = 120

    train_data_dir = os.path.join('leaf', 'data', args.dataset, 'data',
                                  'train_sm')
    test_data_dir = os.path.join('leaf', 'data', args.dataset, 'data',
                                 'test_sm')

    print("Loading data...")
    users, groups, train_data, test_data = read_data(train_data_dir,
                                                     test_data_dir)
    print("Loading data... complete")

    print(
        train_single(client_id, None, 1, 0, train_data[client_id],
                     test_data[client_id], tangle_name))
Пример #21
0
def setup_clients(dataset, data_dir, model=None, validation=False, corruption=None,
                  fraction_corrupt=0.1, seed=-1, subsample=True):
    """Instantiates clients based on given train and test data directories.
        If validation is True, use part of training set as validation set

    Return:
        all_clients: list of Client objects.
    """
    train_data_dir = os.path.join(data_dir, dataset, 'data', 'train')
    test_data_dir = os.path.join(data_dir, dataset, 'data', 'test')

    users, groups, train_data, test_data = read_data(train_data_dir, test_data_dir)

    # subsample
    if subsample and dataset == 'femnist':
        # Pick 1000 fixed users for experiment
        rng_sub = random.Random(25)
        users = rng_sub.sample(users, 1000)
        train_data = {u: p for (u, p) in train_data.items() if u in users}
        test_data = {u: p for (u, p) in test_data.items() if u in users}
        # note: groups are empty for femnist

    if validation:  # split training set into train and val in the ratio 80:20
        print('Validation mode, splitting train data into train and val sets...')
        for idx, u in enumerate(users):
            data = list(zip(train_data[u]['x'], train_data[u]['y']))
            rng = random.Random(idx)
            rng.shuffle(data)
            split_point = int(0.8 * len(data))
            x, y = zip(*data[:split_point])
            x1, y1 = zip(*data[split_point:])
            train_data[u] = {'x': list(x), 'y': list(y)}
            test_data[u] = {'x': list(x1), 'y': list(y1)}
    if len(groups) == 0:
        groups = [[] for _ in users]
    all_clients = [Client(u, g, train_data[u], test_data[u], model) for u, g in zip(users, groups)]
    corrupted_clients = apply_corruption_all(all_clients, dataset, corruption, fraction_corrupt, seed)
    return all_clients, corrupted_clients
Пример #22
0
    def __init__(self, dataset, algorithm, model, batch_size, learning_rate, L,
                 num_glob_iters, local_epochs, users_per_round, similarity,
                 noise, times):
        super().__init__(dataset, algorithm, model[0], batch_size,
                         learning_rate, L, num_glob_iters, local_epochs,
                         users_per_round, similarity, noise, times)

        # Initialize data for all  users
        data = read_data(dataset)
        total_users = len(data[0])
        for i in range(total_users):
            id, train, test = read_user_data(i, data, dataset)
            user = UserAVG(id, train, test, model, batch_size, learning_rate,
                           L, local_epochs)
            self.users.append(user)
            self.total_train_samples += user.train_samples

        if self.noise:
            self.communication_thresh = rayleigh.ppf(
                1 - users_per_round / total_users)  # h_min

        print("Number of users / total users:", users_per_round, " / ",
              total_users)
        print("Finished creating FedAvg server.")
Пример #23
0
def main(dataset, num_users, user_labels, niid):
    read_data(dataset, niid, num_users, user_labels)
Пример #24
0
 def __init__(self, dataset, algorithm, model, async_process, batch_size,
              learning_rate, lamda, beta, num_glob_iters, local_epochs,
              optimizer, num_users, user_labels, niid, times, data_load,
              extra):
     self.dataset = dataset
     self.model = copy.deepcopy(model)
     self.algorithm = algorithm
     self.optimizer = optimizer
     self.batch_size = batch_size
     self.learning_rate = learning_rate
     self.async_process = async_process
     self.lamda = lamda
     self.beta = beta
     self.times = 8
     self.data_load = data_load
     self.extra = extra
     self.num_users = num_users
     self.num_glob_iters = num_glob_iters
     self.local_epochs = local_epochs
     self.user_labels = user_labels
     self.niid = niid
     self.users = []
     self.local_acc = []
     self.avg_local_acc = []
     self.avg_local_train_acc = []
     self.avg_local_train_loss = []
     self.server_acc = []
     # old data split
     data = read_data(dataset, niid, num_users, user_labels)
     self.num_users = num_users
     test_data = []
     # id, train, test = read_user_data(0, data, dataset)
     # if algorithm == 'FedAvg':
     #     user = UserFedAvg(id, train, test, model, async_process, batch_size, learning_rate, lamda, beta, local_epochs, optimizer, data_load, self.times)
     # if algorithm == 'ASO':
     #     user = UserASO(id, train, test, model, async_process, batch_size, learning_rate, lamda, beta, local_epochs, optimizer, data_load, self.times)
     # if algorithm == 'LGP':
     #     user = UserLGP(id, train, test, model, async_process, batch_size, learning_rate, lamda, beta, local_epochs, optimizer, data_load, self.times)
     # if algorithm == 'PerFed':
     #     user = UserPerFed(id, train, test, model, async_process, batch_size, learning_rate, lamda, beta, local_epochs, optimizer, data_load, self.times)
     # self.users.append(user)
     # test_data.extend(test)
     for i in range(self.times):
         id, train, test = read_user_data(i, data, dataset)
         if algorithm == 'FedAvg':
             user = UserFedAvg(id, train, test, model, async_process,
                               batch_size, learning_rate, lamda, beta,
                               local_epochs, optimizer, data_load, i + 9)
         if algorithm == 'ASO':
             user = UserASO(id, train, test, model, async_process,
                            batch_size, learning_rate, lamda, beta,
                            local_epochs, optimizer, data_load, i + 9)
         if algorithm == 'LGP':
             user = UserLGP(id, train, test, model, async_process,
                            batch_size, learning_rate, lamda, beta,
                            local_epochs, optimizer, data_load, i + 9)
         if algorithm == 'PerFed':
             user = UserPerFed(id, train, test, model, async_process,
                               batch_size, learning_rate, lamda, beta,
                               local_epochs, optimizer, data_load, i + 9)
         if algorithm == 'FedAsync':
             user = UserFedAsync(id, train, test, model, async_process,
                                 batch_size, learning_rate, lamda, beta,
                                 local_epochs, optimizer, data_load, i + 9)
         self.users.append(user)
         test_data.extend(test)
     for i in range(self.times, self.num_users):
         id, train, test = read_user_data(i, data, dataset)
         if algorithm == 'PerFed':
             user = UserPerFed(id, train, test, model, async_process,
                               batch_size, learning_rate, lamda, beta,
                               local_epochs, optimizer, data_load)
         if algorithm == 'FedAvg':
             user = UserFedAvg(id, train, test, model, async_process,
                               batch_size, learning_rate, lamda, beta,
                               local_epochs, optimizer, data_load)
         if algorithm == 'ASO':
             user = UserASO(id, train, test, model, async_process,
                            batch_size, learning_rate, lamda, beta,
                            local_epochs, optimizer, data_load)
         if algorithm == 'LGP':
             user = UserLGP(id, train, test, model, async_process,
                            batch_size, learning_rate, lamda, beta,
                            local_epochs, optimizer, data_load)
         if algorithm == 'FedAsync':
             user = UserFedAsync(id, train, test, model, async_process,
                                 batch_size, learning_rate, lamda, beta,
                                 local_epochs, optimizer, data_load)
         self.users.append(user)
         test_data.extend(test)
     if algorithm == 'FedAvg':
         self.server = ServerFedAvg(algorithm, model, async_process,
                                    test_data, batch_size)
     if algorithm == 'PerFed':
         self.server = ServerPerFed(algorithm, model, async_process,
                                    test_data, batch_size)
     if algorithm == 'ASO':
         self.server = ServerASO(algorithm, model, async_process, test_data,
                                 batch_size)
     if algorithm == 'LGP':
         self.server = ServerLGP(algorithm, model, async_process, test_data,
                                 batch_size)
     if algorithm == 'FedAsync':
         self.server = serverFedAsync(algorithm, model, async_process,
                                      test_data, batch_size)
     for user in self.users:
         self.server.append_user(user.id, user.train_data_samples)
Пример #25
0
def main(experiment, dataset, algorithm, model, batch_size, learning_rate,
         beta, L_k, num_glob_iters, local_epochs, optimizer, numusers, K,
         personal_learning_rate, times, commet, gpu, cutoff):

    # Get device status: Check GPU or CPU
    device = torch.device("cuda:{}".format(gpu) if torch.cuda.is_available()
                          and gpu != -1 else "cpu")

    data = read_data(dataset), dataset

    for i in range(times):
        print("---------------Running time:------------", i)
        # Generate model
        if (model == "mclr"):
            if (dataset == "human_activity"):
                model = Mclr_Logistic(561, 6).to(device), model
            elif (dataset == "gleam"):
                model = Mclr_Logistic(561, 6).to(device), model
            elif (dataset == "vehicle_sensor"):
                model = Mclr_Logistic(100, 2).to(device), model
            elif (dataset == "Synthetic"):
                model = Mclr_Logistic(60, 10).to(device), model
            elif (dataset == "EMNIST"):
                model = Mclr_Logistic(784, 62).to(device), model
            else:  #(dataset == "Mnist"):
                model = Mclr_Logistic().to(device), model

        elif (model == "dnn"):
            if (dataset == "human_activity"):
                model = DNN(561, 100, 12).to(device), model
            elif (dataset == "gleam"):
                model = DNN(561, 20, 6).to(device), model
            elif (dataset == "vehicle_sensor"):
                model = DNN(100, 20, 2).to(device), model
            elif (dataset == "Synthetic"):
                model = DNN(60, 20, 10).to(device), model
            elif (dataset == "EMNIST"):
                model = DNN(784, 200, 62).to(device), model
            else:  #(dataset == "Mnist"):
                model = DNN2().to(device), model

        elif (model == "cnn"):
            if (dataset == "Cifar10"):
                model = CNNCifar(10).to(device), model
            else:
                return

        # select algorithm

        if (algorithm == "FedAvg"):
            if (commet):
                experiment.set_name(dataset + "_" + algorithm + "_" +
                                    model[1] + "_" + str(batch_size) + "_" +
                                    str(learning_rate) + "_" +
                                    str(num_glob_iters) + "_" +
                                    str(local_epochs) + "_" + str(numusers))
            server = FedAvg(experiment, device, data, algorithm, model,
                            batch_size, learning_rate, beta, L_k,
                            num_glob_iters, local_epochs, optimizer, numusers,
                            i, cutoff)

        elif (algorithm == "PerAvg"):
            if (commet):
                experiment.set_name(dataset + "_" + algorithm + "_" +
                                    model[1] + "_" + str(batch_size) + "_" +
                                    str(learning_rate) + "_" +
                                    str(personal_learning_rate) + "_" +
                                    str(learning_rate) + "_" +
                                    str(num_glob_iters) + "_" +
                                    str(local_epochs) + "_" + str(numusers))
            server = PerAvg(experiment, device, data, algorithm, model,
                            batch_size, learning_rate, beta, L_k,
                            num_glob_iters, local_epochs, optimizer, numusers,
                            i, cutoff)

        elif (algorithm == "FedU"):
            if (commet):
                experiment.set_name(dataset + "_" + algorithm + "_" +
                                    model[1] + "_" + str(batch_size) + "_" +
                                    str(learning_rate) + "_" + str(L_k) +
                                    "L_K" + "_" + str(num_glob_iters) + "_" +
                                    str(local_epochs) + "_" + str(numusers))
            server = FedU(experiment, device, data, algorithm, model,
                          batch_size, learning_rate, beta, L_k, num_glob_iters,
                          local_epochs, optimizer, numusers, K, i, cutoff)

        elif (algorithm == "pFedMe"):
            if (commet):
                experiment.set_name(dataset + "_" + algorithm + "_" +
                                    model[1] + "_" + str(batch_size) + "_" +
                                    str(learning_rate) + "_" +
                                    str(personal_learning_rate) + "_" +
                                    str(num_glob_iters) + "_" +
                                    str(local_epochs) + "_" + str(numusers))
            server = pFedMe(experiment, device, data, algorithm, model,
                            batch_size, learning_rate, beta, L_k,
                            num_glob_iters, local_epochs, optimizer, numusers,
                            K, personal_learning_rate, i, cutoff)

        elif (algorithm == "Local"):
            if (commet):
                experiment.set_name(dataset + "_" + algorithm + "_" +
                                    model[1] + "_" + str(batch_size) + "_" +
                                    str(learning_rate) + "_" + str(L_k) + "_" +
                                    str(num_glob_iters) + "_" +
                                    str(local_epochs) + "_" + str(numusers))
            server = FedLocal(experiment, device, data, algorithm, model,
                              batch_size, learning_rate, beta, L_k,
                              num_glob_iters, local_epochs, optimizer,
                              numusers, i, cutoff)

        elif (algorithm == "Global"):
            if (commet):
                experiment.set_name(dataset + "_" + algorithm + "_" +
                                    model[1] + "_" + str(batch_size) + "_" +
                                    str(learning_rate) + "_" + str(L_k) + "_" +
                                    str(num_glob_iters) + "_" +
                                    str(local_epochs) + "_" + str(numusers))
            server = FedGlobal(experiment, device, data, algorithm, model,
                               batch_size, learning_rate, beta, L_k,
                               num_glob_iters, local_epochs, optimizer,
                               numusers, i, cutoff)
        else:
            print("Algorithm is invalid")
            return

        server.train()
        server.test()

    average_data(num_users=numusers,
                 loc_ep1=local_epochs,
                 Numb_Glob_Iters=num_glob_iters,
                 lamb=L_k,
                 learning_rate=learning_rate,
                 beta=beta,
                 algorithms=algorithm,
                 batch_size=batch_size,
                 dataset=dataset,
                 k=K,
                 personal_learning_rate=personal_learning_rate,
                 times=times,
                 cutoff=cutoff)
Пример #26
0
def setup_clients(dataset, model_name=None, model=None, validation=False, seed=-1,
                  split_by_user=True, subsample_fraction=0.5):
    """Instantiates clients based on given train and test data directories.
        If validation is True, use part of training set as validation set

    Return:
        all_clients: list of Client objects.
    """
    train_data_dir = os.path.join('..', 'data', dataset, 'data', 'train')
    test_data_dir = os.path.join('..', 'data', dataset, 'data', 'test')

    clients, groups, train_data, test_data = read_data(train_data_dir, test_data_dir, split_by_user=split_by_user,
                                                       dataset=dataset)
    if seed != -1:
        np.random.seed(seed)
    else:
        np.random.seed(42)

    if split_by_user:
        train_users = clients['train_users']
        test_users = clients['test_users']

        if dataset == 'femnist':
            # Only subsample femnist
            indices_train = np.arange(len(train_users))
            indices_test = np.arange(len(test_users))
            fraction_used = int(subsample_fraction * len(train_users))
            subindices_train = np.random.choice(indices_train, fraction_used, replace=False)
            subindices_test = np.random.choice(indices_test, fraction_used, replace=False)

            train_users = [train_users[i] for i in subindices_train]
            test_users = [test_users[i] for i in subindices_test]

        train_groups = [[] for _ in train_users]
        test_groups = [[] for _ in test_users]

        if validation:
            fraction = 0.5 if (dataset == 'sent140') else 0.8
            split_point = int(fraction * len(train_users))
            validation_users = train_users[split_point:]
            validation_data = {}
            validation_groups = train_groups[split_point:]
            for u in validation_users:
                validation_data[u] = dict(train_data[u])
                del train_data[u]

            train_users = train_users[:split_point]
            train_groups = train_groups[:split_point]

    else: # split by sample
        users = clients['train_users']
    # subsample
        if dataset == 'femnist':
            # TODO : WHAT IS THIS ???
            rng_sub = random.Random(25)
            users = rng_sub.sample(users, 100)
            train_data = {u: p for (u, p) in train_data.items() if u in users}
            test_data = {u: p for (u, p) in test_data.items() if u in users}
            # note: groups are empty
        elif dataset == 'shakespeare':
            pass
        else:
            print('Not subsampling data')
            sys.exit(-1)

        if validation:  # split training set into train and val in the ratio 80:20
            print('Validation mode, splitting train data into train and val sets...')
            for idx, u in enumerate(users):
                data = list(zip(train_data[u]['x'], train_data[u]['y']))
                rng = random.Random(idx)
                rng.shuffle(data)
                split_point = int(0.8 * len(data))
                x, y = zip(*data[:split_point])
                x1, y1 = zip(*data[split_point:])
                train_data[u] = {'x': list(x), 'y': list(y)}
                test_data[u] = {'x': list(x1), 'y': list(y1)}
        if len(groups) == 0:
            groups = [[] for _ in users]
    if split_by_user:

        print('------>', len(train_users))

        train_clients = []
        test_clients = []

        # if True, do not preprocess data
        do_preprocess = not (
                dataset == 'sent140' and model_name in ['erm_lstm_log_reg', 'rsm_lstm_log_reg']
        )
        print('do_preprocess = ', do_preprocess)

        for u, g in zip(train_users, train_groups):
            train_data_u_x = (
                preprocess_data_x(train_data[u]['x'],
                                  dataset=dataset,
                                  model_name=model_name)
                if do_preprocess else train_data[u]['x']
            )
            train_data_u_y = (
                preprocess_data_y(train_data[u]['y'],
                                  dataset=dataset,
                                  model_name=model_name)
                if do_preprocess else train_data[u]['y']
            )
            train_data_u = {'x': train_data_u_x, 'y': train_data_u_y}
            train_clients.append(Client(u, g, train_data=train_data_u, model=model, dataset=dataset))

        for u, g in zip(test_users, test_groups):
            test_data_u_x = (
                preprocess_data_x(test_data[u]['x'],
                                  dataset=dataset,
                                  model_name=model_name)
                if do_preprocess else test_data[u]['x']
            )
            test_data_u_y = (
                preprocess_data_y(test_data[u]['y'],
                                  dataset=dataset,
                                  model_name=model_name)
                if do_preprocess else test_data[u]['y']
            )
            test_data_u = {'x': test_data_u_x, 'y': test_data_u_y}
            test_clients.append(Client(u, g,  eval_data=test_data_u, model=model, dataset=dataset))

        all_clients = {
            'train_clients': train_clients,
            'test_clients': test_clients
        }

        if validation:
            validation_clients = []
            for u, g in zip(validation_users, validation_groups):
                validation_data_u_x = (
                    preprocess_data_x(validation_data[u]['x'],
                                      dataset=dataset,
                                      model_name=model_name)
                    if do_preprocess else validation_data[u]['x']
                )
                validation_data_u_y = (
                    preprocess_data_y(validation_data[u]['y'],
                                      dataset=dataset,
                                      model_name=model_name)
                    if do_preprocess else validation_data[u]['y']
                )
                validation_data_u = {'x': validation_data_u_x, 'y': validation_data_u_y}
                validation_clients.append(Client(u, g, train_data=validation_data_u, model=model, dataset=dataset))

            all_clients['validation_clients'] = validation_clients

        return all_clients 

    else:
        # every client is used for training when split_by_user is False
        train_clients = []
        for u, g in zip(users, groups):
            # TODO: skip preprocess if necessary
            train_data_u_x = preprocess_data_x(train_data[u]['x'], dataset=dataset)
            train_data_u_y = preprocess_data_y(train_data[u]['y'], dataset=dataset)
            test_data_u_x = preprocess_data_x(test_data[u]['x'], dataset=dataset)
            test_data_u_y = preprocess_data_y(test_data[u]['y'], dataset=dataset)

            train_data_u = {'x': train_data_u_x, 'y': train_data_u_y}
            test_data_u = {'x': test_data_u_x, 'y': test_data_u_y}

            train_clients.append(Client(u, g, train_data_u, test_data_u, model, dataset=dataset))

        all_clients = {
            'train_clients':train_clients
        }

        return all_clients