示例#1
0
    def __init__(self,
                 size,
                 num_partitions=1,
                 gather_out=True,
                 param_attr=None,
                 bias_attr=None,
                 name=None):
        super().__init__()

        if in_dygraph_mode():
            rank = paddle.distributed.get_rank()
            nranks = paddle.distributed.get_world_size()
        else:
            assert fleet._role_maker, ("To use paddle.distributed.split, "
                                       "you must call fleet.init() firstly.")
            rank = fleet.worker_index()
            nranks = fleet.worker_num()

        # rank within a model parallel group
        inner_rank = rank % num_partitions
        self.gather_out = gather_out

        assert size[1] % num_partitions == 0, (
            "Number of column of the weight for linear ({}) must be"
            " divisible by num_partitions ({})".format(size[1],
                                                       num_partitions))
        self.per_part_size = size[1] // num_partitions
        linear_size = (size[0], self.per_part_size)

        num_rows, num_cols = linear_size

        if not name:
            name = "fc_by_col_rank_%d" % inner_rank
        else:
            name = name + "_by_col_rank_%d" % inner_rank

        self.linear = paddle.nn.Linear(num_rows,
                                       num_cols,
                                       weight_attr=param_attr,
                                       bias_attr=bias_attr,
                                       name=name)

        weight = self.linear.weight
        weight.is_distributed = True
        # alias for weight tensor
        self.weight = self.linear.weight

        startup_block = paddle.static.default_startup_program().global_block()
        main_block = paddle.static.default_main_program().global_block()
        startup_block.vars[weight.name].is_distributed = True
        main_block.vars[weight.name].is_distributed = True
        # set is_distributed for splited bias
        # if a linear layer is splited by col, the bias would also be split into each rank as its weight
        if self.linear._bias_attr != False:
            startup_block.vars[self.linear.bias.name].is_distributed = True
            main_block.vars[self.linear.bias.name].is_distributed = True
            self.bias = self.linear.bias
示例#2
0
    def __init__(self,
                 size,
                 num_partitions=1,
                 input_is_parallel=False,
                 param_attr=None,
                 bias_attr=None,
                 name=None):
        super().__init__()

        if in_dygraph_mode():
            rank = paddle.distributed.get_rank()
            nranks = paddle.distributed.get_world_size()
        else:
            assert fleet._role_maker, ("To use paddle.distributed.split, "
                                       "you must call fleet.init() firstly.")
            rank = fleet.worker_index()
            nranks = fleet.worker_num()

        # rank within a model parallel group
        inner_rank = rank % num_partitions
        self.input_is_parallel = input_is_parallel

        assert size[0] % num_partitions == 0, (
            "Number of rows of the weight for linear ({}) must be"
            " divisible by num_partitions ({})".format(size[0],
                                                       num_partitions))
        self.per_part_size = size[0] // num_partitions
        linear_size = (self.per_part_size, size[1])

        num_rows, num_cols = linear_size

        if not name:
            name = "fc_by_row_rank_%d" % inner_rank
        else:
            name = name + "_by_row_rank_%d" % inner_rank
        self.linear = paddle.nn.Linear(
            num_rows,
            num_cols,
            weight_attr=param_attr,
            # NOTE(wangxi): row split, bias need add after allreduce
            bias_attr=False,
            name=name)

        weight = self.linear.weight
        weight.is_distributed = True
        # alias for weight tensor
        self.weight = self.linear.weight
        self.bias = self.linear.bias

        startup_block = paddle.static.default_startup_program().global_block()
        main_block = paddle.static.default_main_program().global_block()
        startup_block.vars[weight.name].is_distributed = True
        main_block.vars[weight.name].is_distributed = True
        # set is_distributed for splited bias
        # if a linear layer is splited by row, each rank would hold a complete bias

        if bias_attr is not False:
            self.bias = self.create_parameter(shape=[num_cols],
                                              attr=bias_attr,
                                              dtype=self._dtype,
                                              is_bias=True)
        else:
            self.bias = None