Exemplo n.º 1
0
    def __init__(self, gpu, rank, world_size, dataset, model, batch_size, lr,
                 client_epoch, seed, exp_id):
        super().__init__()

        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        np.random.seed(seed)
        random.seed(seed)
        torch.backends.cudnn.deterministic = True

        self.iid = False

        logger.add(f"logs/fed/{world_size}_{dataset}_{batch_size}_{lr}"
                   f"_{exp_id}_{rank}.log")
        self.rank = rank
        self.lr = lr
        self.client_epoch = client_epoch
        self.device = f"cuda:{gpu}" if torch.cuda.is_available() else "cpu"
        self.model = load_model(
            model,
            class_num=100 if dataset == "cifar100" else 10).to(self.device)
        self.optimizer = torch.optim.SGD(self.model.parameters(),
                                         lr=self.lr,
                                         momentum=0.)
        self.loss_func = nn.CrossEntropyLoss()
        self.train_loader, _ = partition_dataset(dataset, world_size - 1,
                                                 rank - 1, batch_size, seed,
                                                 self.iid)
Exemplo n.º 2
0
    def __init__(self, gpu, world_size, dataset, batch_size, lr, model,
                 max_epoch, client_epoch, seed, exp_id):
        super().__init__()

        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        np.random.seed(seed)
        random.seed(seed)
        torch.backends.cudnn.deterministic = True

        self.iid = False

        logger.add(f"logs/fed/{world_size}_{dataset}_{batch_size}_{lr}"
                   f"_{model}_{max_epoch}_{client_epoch}_PS{exp_id}.log")
        if wandb_enable:
            wandb.init(
                project="Async_FedAvg",
                name=
                f"vanilla_{world_size}_{batch_size}_{max_epoch}_{client_epoch}"
                f"_{lr}_{dataset}_{model}_{'iid' if self.iid else 'noniid'}",
                config={
                    "method": "vanilla",
                    "world size": world_size,
                    "dataset": dataset,
                    "iid": self.iid,
                    "model": model,
                    "batch size": batch_size,
                    "learning rate": lr,
                    "momentum": 0.,
                    "lambda": 0.,
                    "global epoch": max_epoch,
                    "client epoch": client_epoch,
                    "seed": seed
                })

        self.max_epoch = max_epoch
        self.client_epoch = client_epoch
        self.curr_epoch = -1
        self.lr = lr
        self.world_size = world_size
        self.device = f"cuda:{gpu}" if torch.cuda.is_available() else "cpu"
        self.model = load_model(
            model,
            class_num=100 if dataset == "cifar100" else 10).to(self.device)
        self.aggregation = [DataAggregation(r) for r in range(1, world_size)]
        self.embedding_list = []

        # linear.bias

        self.client_counter = 0
        self.fetch_time = 0.
        self.sync_time = 0.

        self.optimizer = torch.optim.SGD(self.model.parameters(),
                                         lr=self.lr,
                                         momentum=0.)
        _, self.test_loader = partition_dataset(dataset, world_size - 1, 0,
                                                batch_size, seed, self.iid)
Exemplo n.º 3
0
    def __init__(self, gpu, rank, world_size, dataset, model, batch_size, lr, seed, exp_id):
        super().__init__()

        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        np.random.seed(seed)
        random.seed(seed)
        torch.backends.cudnn.deterministic = True

        self.iid = False

        logger.add(f"logs/asyncfed/{world_size}_{dataset}_{batch_size}_{lr}"
                   f"_{exp_id}_{rank}.log")
        self.rank = rank
        self.device = f"cuda:{gpu}" if torch.cuda.is_available() else "cpu"
        self.lr = lr
        if dataset == "cifar100":
            class_num = 100
        elif dataset == "emnist":
            class_num = 62
        else:
            class_num = 10
        self.model_name = model
        self.model = load_model(model, class_num).to(self.device)
        self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.lr, momentum=0.)
        if model == "transformer":
            self.loss_func = nn.NLLLoss()
        else:
            self.loss_func = nn.CrossEntropyLoss()
        self.sync_future_all = None
        self.train_loader, self.test_loader = partition_dataset(dataset, world_size - 1, rank - 1,
                                                                 batch_size, seed, iid=self.iid)

        self.latest_model = {}
        self.prefetch_model = {}

        self.extra_work = 0.
        self.extra_work_iter = ExtraWorkLoader(self.train_loader)
Exemplo n.º 4
0
    def __init__(self, gpu, world_size, dataset, batch_size, lr,
                 mom, lambd, model, max_epoch, client_epoch, seed, exp_id,
                 early_stop_round, early_stop_metric):
        super().__init__()

        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        np.random.seed(seed)
        random.seed(seed)
        torch.backends.cudnn.deterministic = True

        self.iid = False

        logger.add(f"logs/asyncfed/{world_size}_{dataset}_{batch_size}_{lr}"
                   f"_{mom}_{lambd}_{model}_{max_epoch}_{client_epoch}_PS{exp_id}.log")
        if wandb_enable:
            wandb.init(project="Async_FedAvg",
                       name=f"async_{world_size}_{batch_size}_{max_epoch}_{client_epoch}_{lr}_{lambd}"
                            f"_{mom}_{dataset}_{model}_{'iid' if self.iid else 'noniid'}",
                       config={
                "method": "async",
                "world size": world_size,
                "dataset": dataset,
                "iid": self.iid,
                "model": model,
                "batch size": batch_size,
                "learning rate": lr,
                "momentum": mom,
                "lambda": lambd,
                "global epoch": max_epoch,
                "client epoch": client_epoch,
                "seed": seed,
                "mom_metho": "normal",
            })

        self.max_epoch = max_epoch * client_epoch
        self.client_epoch = client_epoch
        self.world_size = world_size
        self.mom = mom
        self.device = f"cuda:{gpu}" if torch.cuda.is_available() else "cpu"
        self.model = load_model(model, class_num=100 if dataset == "cifar100" else 10).to(self.device)
        self.lr = lr
        self.lambd = lambd
        # self.learning_rate = learning_rate
        self.aggregation = [DataAggregation(r) for r in range(1, world_size)]
        self.embedding_list = []

        self.dyn_task = np.array([0. for _ in range(self.world_size - 1)])
        self.dyn_timer = np.array([0. for _ in range(self.world_size - 1)])

        self.client_counter = 0
        self.wtminus1 = {}
        self.mom_buffer = {}
        self.gminus1 = {}
        self.broadcast_fut_all = None
        self.cluster_is_ready = True

        self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.lr, momentum=0.)
        _, self.test_loader = partition_dataset(dataset, world_size - 1, 0, batch_size, seed, iid=self.iid)

        self.acc_list = []
        self.early_stop_round = early_stop_round
        self.early_stop_metric = early_stop_metric