def aggregate(conf, master_model, fedavg_model, client_models,
              flatten_local_models):
    # perform the server momentum (either heavy-ball momentum or nesterov momentum)
    fl_aggregate = conf.fl_aggregate

    assert "server_momentum_factor" in fl_aggregate

    # start the server momentum acceleration.
    current_model_tb = TensorBuffer(list(fedavg_model.parameters()))
    previous_model_tb = TensorBuffer(list(master_model.parameters()))

    # get the update direction.
    update = previous_model_tb.buffer - current_model_tb.buffer

    # using server momentum for the update.
    if not hasattr(conf, "server_momentum_buffer"):
        conf.server_momentum_buffer = torch.zeros_like(update)
    conf.server_momentum_buffer.mul_(
        fl_aggregate["server_momentum_factor"]).add_(update)
    previous_model_tb.buffer.add_(-conf.server_momentum_buffer)

    # update the master_model (but will use the bn stats from the fedavg_model)
    master_model = fedavg_model
    _model_param = list(master_model.parameters())
    previous_model_tb.unpack(_model_param)

    # free the memory.
    torch.cuda.empty_cache()

    # a temp hack (only for debug reason).
    client_models = dict((used_client_arch, master_model.cpu())
                         for used_client_arch in conf.used_client_archs)
    return client_models
    def _receive_models_from_selected_clients(self, selected_client_ids):
        self.conf.logger.log(f"Master waits to receive the local models.")
        dist.barrier()

        # init the placeholders to recv the local models from workers.
        flatten_local_models = dict()
        for selected_client_id in selected_client_ids:
            arch = self.clientid2arch[selected_client_id]
            client_tb = TensorBuffer(
                list(self.client_models[arch].state_dict().values()))
            client_tb.buffer = torch.zeros_like(client_tb.buffer)
            flatten_local_models[selected_client_id] = client_tb

        # async to receive model from clients.
        reqs = []
        for client_id, world_id in zip(selected_client_ids, self.world_ids):
            req = dist.irecv(tensor=flatten_local_models[client_id].buffer,
                             src=world_id)
            reqs.append(req)

        for req in reqs:
            req.wait()

        dist.barrier()
        self.conf.logger.log(f"Master received all local models.")
        return flatten_local_models
Exemplo n.º 3
0
    def step(self, closure=None, **kargs):
        # Apply the gradients with the weight decay and momentum.
        with kargs["timer"]("grad.apply_grad", epoch=self.conf.epoch_):
            utils.apply_gradient(
                self.param_groups, self.state, apply_grad_to_model=True
            )

        with kargs["timer"]("grad.get_params", epoch=self.conf.epoch_):
            params, _ = comm.get_data(
                self.param_groups, self.param_names, is_get_grad=False
            )
            params_tb = TensorBuffer(params)

        with kargs["timer"]("grad.error_compensate", epoch=self.conf.epoch_):
            self.memory.buffer += params_tb.buffer

        with kargs["timer"]("grad.compress", epoch=self.conf.epoch_):
            sync_buffer = {"original_shapes": self.shapes, "params_tb": self.memory}
            local_compressed_params_tb = self.compressor.compress(sync_buffer)

        with kargs["timer"]("grad.update_memory", epoch=self.conf.epoch_):
            self.memory.buffer = self.memory.buffer - local_compressed_params_tb.buffer

        with kargs["timer"]("grad.sync", epoch=self.conf.epoch_):
            self.compressor.sync(sync_buffer)

        # update local model.
        with kargs["timer"]("grad.decompress", epoch=self.conf.epoch_):
            aggregated_info_tb = self.compressor.uncompress(
                sync_buffer, self.neighbors_info
            )
            params_tb.buffer += aggregated_info_tb.buffer
            params_tb.unpack(params)
        return sync_buffer["n_bits"]
Exemplo n.º 4
0
    def step(self, closure=None, **kargs):
        with kargs['timer']('sync', epoch=self.conf.epoch_):
            # do the local update steps.
            with kargs["timer"]("local_update", epoch=self.conf.epoch_):
                utils.apply_gradient(self.param_groups,
                                     self.state,
                                     apply_grad_to_model=True)

            # enter the global sync if it satisfies the condition.
            if (self.conf.epoch_ < self.turn_on_local_step_from_epoch
                    or self.conf.local_index % self.local_step == 0):
                with kargs["timer"]("get_params", epoch=self.conf.epoch_):
                    # get parmas.
                    params, _ = comm.get_data(self.param_groups,
                                              self.param_names,
                                              is_get_grad=False)
                    params_tb = TensorBuffer(params)
                with kargs['timer']('memory_and_compress',
                                    epoch=self.conf.epoch_):
                    # get the params difference w.r.t. previous synced model.
                    local_scale, local_sign = [], []
                    for consensus_param, param, memory in zip(
                            self.consensus_params_tb, params_tb,
                            self.memory_tb):
                        memory.data.copy_(consensus_param - param + memory)
                        # compress.
                with kargs["timer"]("directions", epoch=self.conf.epoch_):
                    direction = exchange(self.memory_tb.buffer)  #signum
                with kargs['timer']('memory_and_compress',
                                    epoch=self.conf.epoch_):
                    for consensus_param, param, memory in zip(
                            self.consensus_params_tb, params_tb,
                            self.memory_tb):
                        _local_scale, _local_sign = scaled_sign(memory)
                        local_scale.append(_local_scale)
                        local_sign.append(_local_sign)
                        memory.data.copy_(memory - _local_scale * _local_sign)
                with kargs["timer"]("directions", epoch=self.conf.epoch_):
                    global_direction = TB(self.memory_tb, direction)
                with kargs["timer"]("magnitudes", epoch=self.conf.epoch_):
                    magnitudes_tb = TensorBuffer(local_scale)
                    magnitudes_tb.buffer = self.world_aggregator._agg(
                        magnitudes_tb.buffer,
                        "avg",
                        distributed=self.conf.distributed)
                # unpack the synced info and update the consensus params.
                with kargs["timer"]("update_consensus",
                                    epoch=self.conf.epoch_):
                    for update_magnitude, update_direction, consensus_param in zip(
                            magnitudes_tb, global_direction,
                            self.consensus_params_tb):
                        consensus_param.add_(
                            -1.0, update_direction.mul(update_magnitude))

                # consistent the local models by assigning the consensus params.
                self.consensus_params_tb.unpack(params)
                n_bits = get_n_bits(magnitudes_tb.buffer)
            else:
                n_bits = 0
            return n_bits
Exemplo n.º 5
0
    def compress(self, sync_buffer):
        # get the sign/magnitude for the tensor (to be transmitted).
        selected_values, selected_indices = [], []

        for half_param, hat_param in zip(sync_buffer["flatten_half_params"],
                                         sync_buffer["flatten_params"]):
            _selected_values, _selected_indices = self.compressor_fn.compress(
                half_param - hat_param,
                self.comm_op,
                self.compress_ratio,
                self.is_biased,
            )
            selected_values.append(_selected_values)
            selected_indices.append(_selected_indices)

        # get selected shapes.
        selected_shapes = [len(_value) for _value in selected_values]

        # flatten selected values/indices.
        flatten_selected_values = TensorBuffer(selected_values)
        flatten_selected_indices = TensorBuffer(selected_indices)

        # get n_bits to transmit.
        n_bits = get_n_bits(flatten_selected_values.buffer) + get_n_bits(
            flatten_selected_indices.buffer)

        # update shared dict.
        sync_buffer["selected_shapes"] = selected_shapes
        sync_buffer["flatten_selected_values"] = flatten_selected_values
        sync_buffer["flatten_selected_indices"] = flatten_selected_indices
        sync_buffer["n_bits"] = n_bits
Exemplo n.º 6
0
    def step(self, closure=None, **kargs):
        # Apply the gradients with the weight decay and momentum.
        with kargs["timer"]("grad.apply_grad", epoch=self.conf.epoch_):
            utils.apply_gradient(
                self.param_groups, self.state, apply_grad_to_model=False
            )

        # get flattened params.
        with kargs["timer"]("grad.get_params", epoch=self.conf.epoch_):
            params, _ = comm.get_data(
                self.param_groups, self.param_names, is_get_grad=False
            )
            flatten_params = TensorBuffer(params)

            grads, _ = comm.get_data(
                self.param_groups, self.param_names, is_get_grad=True
            )
            flatten_grads = TensorBuffer(grads)

        with kargs["timer"]("grad.get_extrapolated_model", epoch=self.conf.epoch_):
            flatten_updated_params = deepcopy(flatten_params)

            # get weighted hat params.
            flatten_updated_params.buffer = sum(
                [
                    _hat_params.buffer * self.neighbors_info[_rank]
                    for _rank, _hat_params in self.neighbor_hat_params.items()
                ]
            )

        # get updated local model (flatten params).
        with kargs["timer"]("grad.unflatten_to_update", epoch=self.conf.epoch_):
            flatten_updated_params.buffer.add_(
                flatten_grads.buffer, alpha=-self.param_groups[0]["lr"]
            )
            flatten_updated_params.unpack(params)

            # get extrapolated model.
            flatten_updated_params.buffer = (
                (1 - 0.5 * self.conf.local_index) * flatten_params.buffer
                + 0.5 * self.conf.local_index * flatten_updated_params.buffer
            )

        # compress the model difference and sync.
        with kargs["timer"]("grad.compress", epoch=self.conf.epoch_):
            sync_buffer = {
                "original_shapes": self.shapes,
                "flatten_updated_params": flatten_updated_params,
            }
            self.compressor.compress(sync_buffer)

        with kargs["timer"]("grad.sync", epoch=self.conf.epoch_):
            self.compressor.sync(sync_buffer)

        with kargs["timer"]("grad.unflatten_to_update", epoch=self.conf.epoch_):
            self.compressor.uncompress(
                sync_buffer, self.neighbor_hat_params, self.conf.local_index
            )
        return sync_buffer["n_bits"]
Exemplo n.º 7
0
    def step(self, closure=None, **kargs):
        if self.conf.is_centralized:
            with kargs["timer"]("sync/get_data", epoch=self.conf.epoch_):
                # Get data.
                grads, _ = comm.get_data(self.param_groups,
                                         self.param_names,
                                         is_get_grad=True)
                flatten_grads = TensorBuffer(grads)

            with kargs["timer"]("sync/sync", epoch=self.conf.epoch_):
                # Aggregate the gradients.
                flatten_grads.buffer = self.world_aggregator._agg(
                    flatten_grads.buffer,
                    op="avg",
                    distributed=self.conf.distributed)

            with kargs["timer"]("sync/unflatten_grad", epoch=self.conf.epoch_):
                # unflatten grads.
                flatten_grads.unpack(grads)

            with kargs["timer"]("sync/apply_grad", epoch=self.conf.epoch_):
                utils.apply_gradient(self.param_groups,
                                     self.state,
                                     apply_grad_to_model=True)

            # Get n_bits to transmit.
            n_bits = get_n_bits(flatten_grads.buffer)
        else:
            with kargs["timer"]("sync/apply_grad", epoch=self.conf.epoch_):
                utils.apply_gradient(self.param_groups,
                                     self.state,
                                     apply_grad_to_model=True)

            with kargs["timer"]("sync/get_data", epoch=self.conf.epoch_):
                # first get and flatten all params.
                params, _ = comm.get_data(self.param_groups,
                                          self.param_names,
                                          is_get_grad=False)
                flatten_params = TensorBuffer(params)

            with kargs["timer"]("sync/sync", epoch=self.conf.epoch_):
                # prepare the sync.
                if self.conf.comm_device == "cpu":
                    flatten_params.buffer.cpu().detach_()

                # then sync.
                flatten_params.buffer = self.decentralized_aggregator._agg(
                    flatten_params.buffer, op="weighted")

            with kargs["timer"]("sync/update_model", epoch=self.conf.epoch_):
                # finally unflatten.
                flatten_params.unpack(params)

            # Get n_bits to transmit.
            n_bits = get_n_bits(flatten_params.buffer)
        return n_bits
Exemplo n.º 8
0
    def init_neighbor_hat_params(self):
        params, self.shapes = comm.get_data(self.param_groups,
                                            self.param_names,
                                            is_get_grad=False)
        flatten_params = TensorBuffer(params)
        flatten_params.buffer = torch.zeros_like(flatten_params.buffer)

        # init the neighbor_params.
        self.neighbor_hat_params = {
            self.rank: deepcopy(flatten_params),
            "memory": deepcopy(flatten_params),
        }
Exemplo n.º 9
0
        def _init_neighbor_hat_params(conf, param_groups, param_names):
            params, params_shapes = comm.get_data(param_groups,
                                                  param_names,
                                                  is_get_grad=False)
            flatten_params = TensorBuffer(params)
            flatten_params.buffer = torch.zeros_like(flatten_params.buffer)

            # init the neighbor_params.
            return (
                {
                    conf.graph.rank: deepcopy(flatten_params),
                    "memory": deepcopy(flatten_params),
                },
                params_shapes,
            )
Exemplo n.º 10
0
    def compress(self, grads_tb):
        # get the sign/magnitude for the tensor (to be transmitted).
        sync_buffer = dict()

        # flatten selected values/indices.
        grad_norms_tb = TensorBuffer([grad.norm(p=1) for grad in grads_tb])
        signs, sign_size = self.compressor_fn.compress(grads_tb.buffer)

        # get compressed grad.
        synced_grads_tb = copy.deepcopy(grads_tb)
        for synced_grad, grad_norm, grad in zip(synced_grads_tb, grad_norms_tb,
                                                grads_tb):
            synced_grad.data.copy_(grad_norm * torch.sign(grad) /
                                   grad.nelement())

        # get n_bits to transmit.
        n_bits = get_n_bits(grad_norms_tb.buffer) + get_n_bits(signs)

        # update shared dict.
        sync_buffer["grad_norms_tb"] = grad_norms_tb
        sync_buffer["grads_tb"] = grads_tb
        sync_buffer["synced_grads_tb"] = synced_grads_tb
        sync_buffer["signs"] = signs
        sync_buffer["sign_size"] = sign_size
        sync_buffer["n_bits"] = n_bits
        return sync_buffer
Exemplo n.º 11
0
    def compress(self, sync_buffer):
        # flatten selected values/indices.
        param_norms_tb = TensorBuffer(
            [param.norm(p=1) for param in sync_buffer["params_tb"]]
        )
        signs, sign_size = self.compressor_fn.compress(sync_buffer["params_tb"].buffer)

        # get compressed model.
        local_compressed_params_tb = deepcopy(sync_buffer["params_tb"])
        for local_compressed_param, param_norm, param in zip(
            local_compressed_params_tb, param_norms_tb, sync_buffer["params_tb"]
        ):
            local_compressed_param.data.copy_(
                param_norm * torch.sign(param) / param.nelement()
            )

        # get n_bits to transmit.
        n_bits = get_n_bits(param_norms_tb.buffer) + get_n_bits(signs)

        # update shared dict.
        sync_buffer["param_norms_tb"] = param_norms_tb
        sync_buffer["signs"] = signs
        sync_buffer["sign_size"] = sign_size
        sync_buffer["n_bits"] = n_bits
        return local_compressed_params_tb
Exemplo n.º 12
0
    def compress(self, sync_buffer):
        # get the sign/magnitude for the tensor (to be transmitted).
        quantized_values = []

        # compress and get compressed model.
        local_compressed_params_tb = deepcopy(sync_buffer["params_tb"])
        local_compressed_params_tb.buffer = torch.zeros_like(
            local_compressed_params_tb.buffer
        )
        for param, local_compressed_param in zip(
            sync_buffer["params_tb"], local_compressed_params_tb
        ):
            # quantize.
            _quantized_values = self.compressor_fn.compress(
                param, self.comm_op, self.quantize_level, self.is_biased
            )
            quantized_values.append(_quantized_values)

            # update the local compressed params.
            local_compressed_param.data.copy_(_quantized_values)

        # flatten selected values/indices.
        flatten_updates = TensorBuffer(quantized_values)

        # get n_bits to transmit.
        n_bits = get_n_bits(flatten_updates.buffer) * self.quantize_level / 32

        # update shared dict.
        sync_buffer["flatten_updates"] = flatten_updates
        sync_buffer["n_bits"] = n_bits
        return local_compressed_params_tb
Exemplo n.º 13
0
    def decompress(self, sync_buffer):
        # decompress and update.
        for rank in range(self.world_size):
            if rank == self.rank:
                continue

            # get grad_norm and build its tensorbuffer.
            _grad_norms = comm.recover_device(
                sync_buffer["synced_grad_norms"][rank],
                device=sync_buffer["synced_grads_tb"].buffer.device,
            )
            grad_norms_tb = TensorBuffer(_grad_norms)

            # get signs and build its tensorbuffer.
            signs = comm.recover_device(
                sync_buffer["synced_signs"][rank],
                device=sync_buffer["synced_grads_tb"].buffer.device,
            )
            _signs = self.compressor_fn.uncompress(signs,
                                                   sync_buffer["sign_size"])
            signs_tb = copy.deepcopy(sync_buffer["synced_grads_tb"])
            signs_tb.buffer = _signs

            # update grads.
            for grad_norm, sign, synced_grad in zip(
                    grad_norms_tb, signs_tb, sync_buffer["synced_grads_tb"]):
                _update = grad_norm * sign / synced_grad.nelement()
                synced_grad.add_(_update)

        # average grad.
        sync_buffer["synced_grads_tb"].buffer /= self.world_size * 1.0
        return sync_buffer["synced_grads_tb"]
Exemplo n.º 14
0
    def uncompress(self, sync_buffer, neighbors_info):
        aggregated_info_tb = deepcopy(sync_buffer["params_tb"])
        aggregated_info_tb.buffer = torch.zeros_like(aggregated_info_tb.buffer)

        # uncompress and update.
        for rank in neighbors_info.keys():
            param_norms = sync_buffer["synced_param_norms"][rank]
            signs = sync_buffer["synced_signs"][rank]

            # recover the message and the corresponding device.
            param_norms = comm.recover_device(
                param_norms, device=sync_buffer["params_tb"].buffer.device
            )
            signs = self.compressor_fn.uncompress(
                comm.recover_device(
                    signs, device=sync_buffer["params_tb"].buffer.device
                ),
                sync_buffer["sign_size"],
            )

            # build the corresponding tensorbuffer.
            param_norms_tb = TensorBuffer(param_norms)
            signs_tb = deepcopy(sync_buffer["params_tb"])
            signs_tb.buffer = signs

            # accumulate information for the neighborhood..
            for _info, _param_norm, _sign in zip(
                aggregated_info_tb, param_norms_tb, signs_tb
            ):
                _info.add_(
                    self.consensus_stepsize
                    * (neighbors_info[rank] - (1 if rank == self.rank else 0))
                    * (_param_norm / _sign.nelement() * _sign)
                )
        return aggregated_info_tb
Exemplo n.º 15
0
    def step(self, closure=None, **kargs):
        with kargs["timer"]("sync.local_update", epoch=self.conf.epoch_):
            utils.apply_gradient(self.param_groups,
                                 self.state,
                                 apply_grad_to_model=True)

        with kargs["timer"]("sync.sync_and_update", epoch=self.conf.epoch_):
            # enter the global sync if it satisfies the condition.
            if (self.conf.epoch_ < self.turn_on_local_step_from_epoch
                    or self.conf.local_index % self.local_step == 0):
                # get parmas.
                params, _ = comm.get_data(self.param_groups,
                                          self.param_names,
                                          is_get_grad=False)
                params_tb = TensorBuffer(params)

                # get params_diff.
                param_diff = self.consensus_params_tb.buffer - params_tb.buffer
                # sync the directions.
                param_diff = self.world_aggregator._agg(
                    param_diff, "avg", distributed=self.conf.distributed)

                # unpack the synced info and update the consensus params.
                self.consensus_params_tb.buffer.add_(-1.0, param_diff)

                # consistent the local models by assigning the consensus params.
                self.consensus_params_tb.unpack(params)

                # Get n_bits to transmit.
                n_bits = get_n_bits(param_diff)
            else:
                n_bits = 0
        return n_bits
    def _listen_to_master(self):
        # listen to master, related to the function `_activate_selected_clients` in `master.py`.
        msg = torch.zeros((3, self.conf.n_participated))
        dist.broadcast(tensor=msg, src=0)
        self.conf.graph.client_id, self.conf.graph.comm_round, self.n_local_epochs = (
            msg[:, self.conf.graph.rank - 1].to(int).cpu().numpy().tolist())

        # once we receive the signal, we init for the local training.
        self.arch, self.model = create_model.define_model(
            self.conf,
            to_consistent_model=False,
            client_id=self.conf.graph.client_id)
        self.model_state_dict = self.model.state_dict()
        self.model_tb = TensorBuffer(list(self.model_state_dict.values()))
        self.metrics = create_metrics.Metrics(self.model,
                                              task="classification")
        dist.barrier()
Exemplo n.º 17
0
def recover_params(param_groups,
                   param_names,
                   rank=None,
                   neighbor_hat_params=None,
                   get_hat_params=True):
    # get flattened params.
    params, _ = comm.get_data(param_groups, param_names, is_get_grad=False)
    flatten_params = TensorBuffer(params)

    if get_hat_params:
        assert neighbor_hat_params is not None and rank is not None
        # recover the hat_params.
        flatten_hat_params = TensorBuffer(params)
        flatten_hat_params.buffer.data[:] = neighbor_hat_params[rank].buffer
        return params, flatten_params, flatten_hat_params
    else:
        return params, flatten_params
Exemplo n.º 18
0
    def step(self, closure=None, **kargs):
        with kargs["timer"]("sync.apply_grad", epoch=self.conf.epoch_):
            utils.apply_gradient(self.param_groups,
                                 self.state,
                                 apply_grad_to_model=False)

        with kargs["timer"]("sync.get_data", epoch=self.conf.epoch_):
            # Get data.
            grads, _ = comm.get_data(self.param_groups,
                                     self.param_names,
                                     is_get_grad=True)
            grads_tb = TensorBuffer(grads)

        with kargs["timer"]("sync.use_memory", epoch=self.conf.epoch_):
            # use memory.
            grads_tb.buffer.add_(self.memory_tb.buffer)

        with kargs["timer"]("sync.compress", epoch=self.conf.epoch_):
            # compress.
            sync_buffer = self.compressor.compress(grads_tb)

        with kargs["timer"]("sync.sync", epoch=self.conf.epoch_):
            self.compressor.sync(sync_buffer)

        with kargs["timer"]("sync.update_memory", epoch=self.conf.epoch_):
            # update memory.
            self.memory_tb.buffer = (grads_tb.buffer -
                                     sync_buffer["synced_grads_tb"].buffer)

        with kargs["timer"]("sync.decompress", epoch=self.conf.epoch_):
            sync_grads_tb = self.compressor.decompress(sync_buffer)

        with kargs["timer"]("sync.apply_grad", epoch=self.conf.epoch_):
            # appply the gradient but only with the gradient.
            params, _ = comm.get_data(self.param_groups,
                                      self.param_names,
                                      is_get_grad=False)
            params_tb = TensorBuffer(params)

            # apply the gradient.
            params_tb.buffer.add_(-self.param_groups[0]["lr"] *
                                  sync_grads_tb.buffer)

            # unpack.
            params_tb.unpack(params)
        return sync_buffer["n_bits"]
 def _send_model_to_master(self):
     dist.barrier()
     self.conf.logger.log(
         f"Worker-{self.conf.graph.worker_id} (client-{self.conf.graph.client_id}) sending the model ({self.arch}) back to Master."
     )
     flatten_model = TensorBuffer(list(self.model.state_dict().values()))
     dist.send(tensor=flatten_model.buffer, dst=0)
     dist.barrier()
Exemplo n.º 20
0
    def init_neighbor_hat_params(self):
        params, self.shapes = comm.get_data(self.param_groups,
                                            self.param_names,
                                            is_get_grad=False)
        flatten_params = TensorBuffer(params)

        # init the neighbor_params.
        self.neighbor_hat_params = dict()
        for rank, _ in self.neighbors_info.items():
            self.neighbor_hat_params[rank] = deepcopy(flatten_params)
Exemplo n.º 21
0
    def compress(self, sync_buffer):
        # get the sign/magnitude for the tensor (to be transmitted).
        selected_values, selected_indices = [], []

        # compress and get compressed model.
        local_compressed_params_tb = deepcopy(sync_buffer["params_tb"])
        local_compressed_params_tb.buffer = torch.zeros_like(
            local_compressed_params_tb.buffer
        )
        for param, local_compressed_param in zip(
            sync_buffer["params_tb"], local_compressed_params_tb
        ):
            _selected_values, _selected_indices = self.compressor_fn.compress(
                param, self.comm_op, self.compress_ratio, self.is_biased
            )
            selected_values.append(_selected_values)
            selected_indices.append(_selected_indices)

            # update the local compressed params.
            local_compressed_param.data = local_compressed_param.data.view(-1)
            local_compressed_param.data[_selected_indices] = _selected_values
            local_compressed_param.data.view(*param.size())

        # get selected shapes.
        selected_shapes = [len(_value) for _value in selected_values]

        # flatten selected values/indices.
        flatten_selected_values = TensorBuffer(selected_values)
        flatten_selected_indices = TensorBuffer(selected_indices)

        # get n_bits to transmit.
        n_bits = get_n_bits(flatten_selected_values.buffer) + get_n_bits(
            flatten_selected_indices.buffer
        )

        # update shared dict.
        sync_buffer["selected_shapes"] = selected_shapes
        sync_buffer["flatten_selected_values"] = flatten_selected_values
        sync_buffer["flatten_selected_indices"] = flatten_selected_indices
        sync_buffer["n_bits"] = n_bits
        return local_compressed_params_tb
Exemplo n.º 22
0
def benchmark1(tensors):
    timer = CUDATimer('baseline')
    local_scale, local_sign = [], []
    with timer('compression'):
        for tensor in tensors:
            _local_scale, _local_sign = scaled_sign(tensor)
            local_scale.append(_local_scale)
            local_sign.append(_local_sign)
    with timer('flattening'):
        magnitudes_tb = TensorBuffer(local_scale)
        directions_tb = TensorBuffer(local_sign)
    with timer('com'):
        dist.all_reduce(magnitudes_tb.buffer, op=dist.ReduceOp.SUM)
        magnitudes_tb.buffer /= 2
        dist.all_reduce(directions_tb.buffer, op=dist.ReduceOp.SUM)
        directions_tb.buffer /= 2
    timer.upload_raw(
        'microbenchmarking', {
            'microbenchmark': 'sign_sgd_com',
            'input': list(map(lambda t: t.size(), tensors))
        })
Exemplo n.º 23
0
def aggregate(conf, master_model, fedavg_model, client_models,
              flatten_local_models):
    # perform the server Adam.
    # Following the setup in the paper, we use momentum of 0.9,
    # numerical stability constant epsilon to be 0.01,
    # the beta_2 is set to 0.99.
    # The suggested server_lr in the original paper is 0.1
    fl_aggregate = conf.fl_aggregate

    assert "server_lr" in fl_aggregate
    beta_2 = fl_aggregate["beta_2"] if "beta_2" in fl_aggregate else 0.99

    # start the server momentum acceleration.
    current_model_tb = TensorBuffer(list(fedavg_model.parameters()))
    previous_model_tb = TensorBuffer(list(master_model.parameters()))

    # get the update direction.
    update = previous_model_tb.buffer - current_model_tb.buffer

    # using server momentum for the update.
    if not hasattr(conf, "second_server_momentum_buffer"):
        conf.second_server_momentum_buffer = torch.zeros_like(update)
    conf.second_server_momentum_buffer.mul_(beta_2).add_(
        (1 - beta_2) * (update**2))
    previous_model_tb.buffer.add_(
        -fl_aggregate["server_lr"] * update /
        (torch.sqrt(conf.second_server_momentum_buffer) + 0.01))

    # update the master_model (but will use the bn stats from the fedavg_model)
    master_model = fedavg_model
    _model_param = list(master_model.parameters())
    previous_model_tb.unpack(_model_param)

    # free the memory.
    torch.cuda.empty_cache()

    # a temp hack (only for debug reason).
    client_models = dict((used_client_arch, master_model.cpu())
                         for used_client_arch in conf.used_client_archs)
    return client_models
Exemplo n.º 24
0
    def compress(self, sync_buffer):
        # get the sign/magnitude for the tensor (to be transmitted).
        norms, updates = [], []
        for flatten_updated_param in sync_buffer["flatten_updated_params"]:
            _update = flatten_updated_param
            updates += [_update]
            norms += [_update.norm(p=1)]

        # flatten selected values/indices.
        flatten_norms = TensorBuffer(norms)
        flatten_updates = TensorBuffer(updates)
        signs, sign_size = self.compressor_fn.compress(flatten_updates.buffer)

        # get n_bits to transmit.
        n_bits = get_n_bits(flatten_norms.buffer) + get_n_bits(signs)

        # update shared dict.
        sync_buffer["flatten_norms"] = flatten_norms
        sync_buffer["flatten_updates"] = flatten_updates
        sync_buffer["signs"] = signs
        sync_buffer["sign_size"] = sign_size
        sync_buffer["n_bits"] = n_bits
 def _send_model_to_selected_clients(self, selected_client_ids):
     # the master_model can be large; the client_models can be small and different.
     self.conf.logger.log(f"Master send the models to workers.")
     for worker_rank, selected_client_id in enumerate(selected_client_ids,
                                                      start=1):
         arch = self.clientid2arch[selected_client_id]
         client_model_state_dict = self.client_models[arch].state_dict()
         flatten_model = TensorBuffer(list(
             client_model_state_dict.values()))
         dist.send(tensor=flatten_model.buffer, dst=worker_rank)
         self.conf.logger.log(
             f"\tMaster send the current model={arch} to process_id={worker_rank}."
         )
     dist.barrier()
Exemplo n.º 26
0
    def step(self, closure=None, **kargs):
        # do the local update steps.
        with kargs["timer"]("sync/get_data", epoch=self.conf.epoch_):
            # get parmas.
            params, _ = comm.get_data(self.param_groups,
                                      self.param_names,
                                      is_get_grad=False)
            params_tb = TensorBuffer(params)

        with kargs["timer"]("sync/apply_grad", epoch=self.conf.epoch_):
            # prepare the gradient (sign)
            utils.apply_gradient(self.param_groups,
                                 self.state,
                                 apply_grad_to_model=False)
            # get grads.
            grads, _ = comm.get_data(self.param_groups,
                                     self.param_names,
                                     is_get_grad=True)
            grads_tb = TensorBuffer(grads)

        # enter the global sync if it satisfies the condition.
        # get the params difference w.r.t. previous synced model.
        with kargs["timer"]("sync/compress", epoch=self.conf.epoch_):
            sync_buffer = self.compressor.compress(grads_tb)

        # sync and decompress.
        with kargs["timer"]("sync/sync_and_decompress",
                            epoch=self.conf.epoch_):
            self.compressor.sync(sync_buffer)
            synced_updates_tb = self.compressor.decompress(sync_buffer)

        # unpack the synced info and update the consensus params.
        with kargs["timer"]("sync/apply_grad", epoch=self.conf.epoch_):
            params_tb.buffer -= self.param_groups[0][
                "lr"] * synced_updates_tb.buffer
            params_tb.unpack(params)
        return sync_buffer["n_bits"]
Exemplo n.º 27
0
def benchmark2(tensors):
    timer = CUDATimer('centralized_allreduce')
    local_compressed, local_scale = [], []
    with timer('compression'):
        for tensor in tensors:
            local_compressed.append(tensor.clone())
        for tensor in tensors:
            _local_scale, _local_sign = scaled_sign(tensor)
            # store local scales and local sign.
            local_scale.append(_local_scale)
    with timer('flattening'):
        magnitudes_tb = TensorBuffer(local_scale)
        #directions_tb = TensorBuffer(local_sign)
        compressed_tb = TensorBuffer(local_compressed)
    with timer('com'):
        centralized_allreduce(compressed_tb.buffer, timer)
        #print('difff after', compressed_tb.buffer - directions_tb.buffer)
        dist.all_reduce(magnitudes_tb.buffer, op=dist.ReduceOp.SUM)
        magnitudes_tb.buffer /= 2
    timer.upload_raw(
        'microbenchmarking', {
            'microbenchmark': 'sign_sgd_com',
            'input': list(map(lambda t: t.size(), tensors))
        })
Exemplo n.º 28
0
    def compress(self, sync_buffer):
        # get the sign/magnitude for the tensor (to be transmitted).
        quantized_values = []

        for flatten_updated_param in sync_buffer["flatten_updated_params"]:
            _quantized_values = self.compressor_fn.compress(
                flatten_updated_param, self.comm_op, self.quantize_level,
                self.is_biased)
            quantized_values.append(_quantized_values)

        # flatten selected values/indices.
        flatten_updates = TensorBuffer(quantized_values)

        # get n_bits to transmit.
        n_bits = get_n_bits(flatten_updates.buffer) * self.quantize_level / 32

        # update shared dict.
        sync_buffer["flatten_updates"] = flatten_updates
        sync_buffer["n_bits"] = n_bits
Exemplo n.º 29
0
    def step(self, closure=None, **kargs):
        # Apply the gradients with the weight decay and momentum.
        with kargs["timer"]("grad.apply_grad", epoch=self.conf.epoch_):
            utils.apply_gradient(self.param_groups,
                                 self.state,
                                 apply_grad_to_model=False)

        with kargs["timer"]("grad.get_grads", epoch=self.conf.epoch_):
            params, _ = comm.get_data(self.param_groups,
                                      self.param_names,
                                      is_get_grad=False)
            flatten_params = TensorBuffer(params)

            grads, _ = comm.get_data(self.param_groups,
                                     self.param_names,
                                     is_get_grad=True)
            flatten_grads = TensorBuffer(grads)

        # Get weighted hat params and apply the local gradient.
        with kargs["timer"]("grad.apply_local_gradient",
                            epoch=self.conf.epoch_):
            flatten_half_params = deepcopy(flatten_params)
            flatten_half_params.buffer = (sum([
                _hat_params.buffer * self.neighbors_info[_rank]
                for _rank, _hat_params in self.neighbor_hat_params.items()
            ]) - self.param_groups[0]["lr"] * flatten_grads.buffer)

        # compress the model difference and sync.
        with kargs["timer"]("grad.compress", epoch=self.conf.epoch_):
            sync_buffer = {
                "original_shapes": self.shapes,
                "flatten_half_params": flatten_half_params,
                "flatten_params": flatten_params,
            }
            self.compressor.compress(sync_buffer)

        with kargs["timer"]("grad.sync", epoch=self.conf.epoch_):
            self.compressor.sync(sync_buffer)

        # finally unflatten and update local model.
        with kargs["timer"]("grad.unflatten_to_update",
                            epoch=self.conf.epoch_):
            self.compressor.uncompress(sync_buffer, self.neighbor_hat_params)
            flatten_params.buffer = self.neighbor_hat_params[
                self.rank].buffer.clone()
            flatten_params.unpack(params)
        return sync_buffer["n_bits"]
Exemplo n.º 30
0
    def compress(self, sync_buffer):
        quantized_values = []

        for half_param, hat_param in zip(sync_buffer["flatten_params"],
                                         sync_buffer["flatten_hat_params"]):
            _quantized_values = self.compressor_fn.compress(
                half_param - hat_param,
                self.comm_op,
                self.quantize_level,
                self.is_biased,
            )
            quantized_values.append(_quantized_values)

        # flatten selected values/indices.
        flatten_updates = TensorBuffer(quantized_values)

        # get n_bits to transmit.
        n_bits = get_n_bits(flatten_updates.buffer) * self.quantize_level / 32

        # update shared dict.
        sync_buffer["flatten_updates"] = flatten_updates
        sync_buffer["n_bits"] = n_bits