Пример #1
0
 def __init__(self, config):
     super().__init__()
     self.dense = MaskedLinear(
         config.intermediate_size,
         config.hidden_size,
         pruning_method=config.pruning_method,
         mask_init=config.mask_init,
         mask_scale=config.mask_scale,
     )
     self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
Пример #2
0
 def __init__(self, config):
     super().__init__()
     self.dense = MaskedLinear(
         config.hidden_size,
         config.intermediate_size,
         pruning_method=config.pruning_method,
         mask_init=config.mask_init,
         mask_scale=config.mask_scale,
     )
     if isinstance(config.hidden_act, str):
         self.intermediate_act_fn = ACT2FN[config.hidden_act]
     else:
         self.intermediate_act_fn = config.hidden_act
Пример #3
0
    def __init__(self, config):
        super().__init__()
        if config.hidden_size % config.num_attention_heads != 0 and not hasattr(
                config, "embedding_size"):
            raise ValueError(
                "The hidden size (%d) is not a multiple of the number of attention "
                "heads (%d)" %
                (config.hidden_size, config.num_attention_heads))
        self.output_attentions = config.output_attentions

        self.num_attention_heads = config.num_attention_heads
        self.attention_head_size = int(config.hidden_size /
                                       config.num_attention_heads)
        self.all_head_size = self.num_attention_heads * self.attention_head_size

        self.query = MaskedLinear(
            config.hidden_size,
            self.all_head_size,
            pruning_method=config.pruning_method,
            mask_init=config.mask_init,
            mask_scale=config.mask_scale,
        )
        self.key = MaskedLinear(
            config.hidden_size,
            self.all_head_size,
            pruning_method=config.pruning_method,
            mask_init=config.mask_init,
            mask_scale=config.mask_scale,
        )
        self.value = MaskedLinear(
            config.hidden_size,
            self.all_head_size,
            pruning_method=config.pruning_method,
            mask_init=config.mask_init,
            mask_scale=config.mask_scale,
        )

        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
def create_masked_linear(in_features, out_features, config, bias=True):
    ret = MaskedLinear(
        in_features=in_features,
        out_features=out_features,
        pruning_method=config.pruning_method,
        mask_init=config.mask_init,
        mask_scale=config.mask_scale,
        mask_block_rows=config.mask_block_rows,
        mask_block_cols=config.mask_block_cols,
        ampere_pruning_method=config.ampere_pruning_method,
        ampere_mask_init=config.ampere_mask_init,
        ampere_mask_scale=config.ampere_mask_scale,
        shuffling_method=config.shuffling_method,
        in_shuffling_group=config.in_shuffling_group,
        out_shuffling_group=config.out_shuffling_group,
    )
    return ret