Пример #1
0
        def init_data(self, use_cuda: bool) -> None:
            self.test_device = torch.device('cuda:0') if use_cuda else \
                    torch.device('cpu:0')
            if not use_cuda:
                torch.set_num_threads(4)

            torch.set_grad_enabled(False)
            self.cfg = BertConfig()

            self.torch_pooler = BertPooler(self.cfg)
            if torch.cuda.is_available():
                self.torch_pooler.to(self.test_device)
            self.torch_pooler.eval()

            self.turbo_pooler = turbo_transformers.BertPooler.from_torch(
                self.torch_pooler)
Пример #2
0
 def __init__(self, config):
     super().__init__(config)
     self.config = config
     self.embeddings = NeZhaEmbeddings(config)
     self.encoder = NeZhaEncoder(config)
     self.pooler = BertPooler(config)
     self.init_weights()
Пример #3
0
 def __init__(self, config):
     super(BertModel, self).__init__(config)
     self.config = config
     self.embeddings = BertEmbeddings(config)
     self.encoder = BertEncoder(config)
     self.pooler = BertPooler(config)
     self.init_weights()
Пример #4
0
 def __init__(self, bert, opt):
     super(BERT_BASE, self).__init__()
     self.bert = bert
     self.opt = opt
     self.dropout = nn.Dropout(opt.dropout)
     self.pooler = BertPooler(bert.config)
     self.dense = nn.Linear(opt.embed_dim, opt.polarities_dim)
Пример #5
0
    def __init__(self, config):
        super(BertImgModel, self).__init__(config)
        self.embeddings = BertEmbeddings(config)
        self.encoder = BertEncoder(config)#CaptionBertEncoder(config)
        self.pooler = BertPooler(config)

        self.img_dim = config.img_feature_dim
        logger.info('BertImgModel Image Dimension: {}'.format(self.img_dim))
        self.img_feature_type = config.img_feature_type
        if hasattr(config, 'use_img_layernorm'):
            self.use_img_layernorm = config.use_img_layernorm
        else:
            self.use_img_layernorm = None

        if config.img_feature_type == 'dis_code':
            self.code_embeddings = nn.Embedding(config.code_voc, config.code_dim, padding_idx=0)
            self.img_embedding = nn.Linear(config.code_dim, self.config.hidden_size, bias=True)
        elif config.img_feature_type == 'dis_code_t': # transpose
            self.code_embeddings = nn.Embedding(config.code_voc, config.code_dim, padding_idx=0)
            self.img_embedding = nn.Linear(config.code_size, self.config.hidden_size, bias=True)
        elif config.img_feature_type == 'dis_code_scale': # scaled
            self.input_embeddings = nn.Linear(config.code_dim, config.code_size, bias=True)
            self.code_embeddings = nn.Embedding(config.code_voc, config.code_dim, padding_idx=0)
            self.img_embedding = nn.Linear(config.code_dim, self.config.hidden_size, bias=True)
        else:
            self.img_embedding = nn.Linear(self.img_dim, self.config.hidden_size, bias=True)
            self.dropout = nn.Dropout(config.hidden_dropout_prob)
            if self.use_img_layernorm:
                self.LayerNorm = LayerNorm(config.hidden_size, eps=config.img_layer_norm_eps)
Пример #6
0
    def __init__(
        self,
        config,
        visual_embedding_dim=512,
        embedding_strategy="plain",
        bypass_transformer=False,
        output_attentions=False,
        output_hidden_states=False,
    ):
        super().__init__(config)
        self.config = config

        config.visual_embedding_dim = visual_embedding_dim
        config.embedding_strategy = embedding_strategy
        config.bypass_transformer = bypass_transformer
        config.output_attentions = output_attentions
        config.output_hidden_states = output_hidden_states

        self.embeddings = BertVisioLinguisticEmbeddings(config)
        self.encoder = BertEncoderJit(config)
        self.pooler = BertPooler(config)
        self.bypass_transformer = config.bypass_transformer

        if self.bypass_transformer:
            self.additional_layer = BertLayerJit(config)

        self.output_attentions = self.config.output_attentions
        self.output_hidden_states = self.config.output_hidden_states
        self.init_weights()
Пример #7
0
 def __init__(self, config, tokenizer, device):
     super().__init__()
     self.config = config
     self.tokenizer = tokenizer
     self.embeddings = BertEmbeddings(self.config)
     self.corrector = BertEncoder(self.config)
     self.mask_token_id = self.tokenizer.mask_token_id
     self.pooler = BertPooler(self.config)
     self.cls = BertOnlyMLMHead(self.config)
     self._device = device
Пример #8
0
    def __init__(self, config, args):
        super().__init__(config)
        self.config = config

        self.embeddings = BertEmbeddings(config)
        self.encoder = BertEncoder(config)
        self.pooler = BertPooler(config)
        self.MAG = MAG(config, args)

        self.init_weights()
Пример #9
0
    def __init__(self, config, add_pooling_layer=True):
        super().__init__(config)
        self.config = config

        self.embeddings = KBertEmbeddings(config)
        self.encoder = BertEncoder(config)

        self.pooler = BertPooler(config) if add_pooling_layer else None

        self.init_weights()
Пример #10
0
    def __init__(self, config):
        super(BertImgModel, self).__init__(config)
        self.embeddings = BertEmbeddings(config)
        self.encoder = CaptionBertEncoder(config)
        self.pooler = BertPooler(config)

        self.img_dim = config.img_feature_dim
        logger.info('BertImgModel Image Dimension: {}'.format(self.img_dim))

        # self.apply(self.init_weights)
        self.init_weights()
Пример #11
0
 def __init__(self, count, config, num_labels):
     super(HSUM, self).__init__()
     self.count = count
     self.num_labels = num_labels
     self.pre_layers = torch.nn.ModuleList()
     self.loss_fct = torch.nn.ModuleList()
     self.pooler = BertPooler(config)
     self.classifier = torch.nn.Linear(config.hidden_size, num_labels)
     for i in range(count):
         self.pre_layers.append(BertLayer(config))
         self.loss_fct.append(torch.nn.CrossEntropyLoss(ignore_index=-1))
Пример #12
0
    def _build_word_embedding(self):
        self.bert_config = BertConfig.from_pretrained(self.config.bert_model_name)
        if self.config.pretrained_bert:
            bert_model = BertForPreTraining.from_pretrained(self.config.bert_model_name)
            self.word_embedding = bert_model.bert.embeddings
            self.pooler = bert_model.bert.pooler
            self.pooler.apply(self.init_weights)

        else:
            self.pooler = BertPooler(self.bert_config)
            self.word_embedding = BertEmbeddings(self.bert_config)
Пример #13
0
Файл: mlp.py Проект: naykun/mmf
    def __init__(self, config: Config, *args, **kwargs):
        super().__init__(config, *args, **kwargs)

        # Head modules
        self.pooler = BertPooler(self.config)
        self.classifier = nn.Sequential(
            nn.Dropout(self.config.hidden_dropout_prob),
            BertPredictionHeadTransform(self.config),
            nn.Linear(self.config.hidden_size, self.config.num_labels),
        )
        self.num_labels = self.config.num_labels
        self.hidden_size = self.config.hidden_size
Пример #14
0
 def __init__(self, hparams):
     super().__init__()
     self.hparams = hparams
     self.save_hyperparameters()
     config = BertConfig()
     #self.model = BertForSequenceClassification.from_pretrained(self.hparams.model_name, num_labels=self.hparams.n_class)
     self.model = AutoModel.from_pretrained(self.hparams.model_name)
     self.pooler = BertPooler(config)
     # self.attention = MultiheadedAttention(h_dim=self.hparams.h_dim, kqv_dim=self.hparams.kqv_dim, n_heads=self.hparams.n_heads)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.classifier = nn.Linear(config.hidden_size, self.hparams.n_class)
     self.loss = nn.CrossEntropyLoss()
Пример #15
0
 def __init__(self, bert, opt):
     super(LCF_BERT, self).__init__()
     self.bert4global = bert
     self.bert4local = copy.deepcopy(
         bert) if opt.use_dual_bert else self.bert4global
     self.opt = opt
     self.dropout = nn.Dropout(opt.dropout)
     self.bert_SA = SelfAttention(bert.config, opt)
     self.linear2 = nn.Linear(opt.embed_dim * 2, opt.embed_dim)
     self.linear3 = nn.Linear(opt.embed_dim * 3, opt.embed_dim)
     self.bert_pooler = BertPooler(bert.config)
     self.dense = nn.Linear(opt.embed_dim, opt.polarities_dim)
Пример #16
0
    def __init__(self, bert, opt):
        super(LFC_BERT, self).__init__()

        self.bert_spc = bert
        self.opt = opt

        self.bert_local = bert
        self.dropout = nn.Dropout(opt['dropout'])
        self.bert_SA = SelfAttention(bert.config, opt)
        self.linear_double = nn.Linear(opt['bert_dim'] * 2, opt['bert_dim'])
        self.linear_single = nn.Linear(opt['bert_dim'], opt['bert_dim'])
        self.bert_pooler = BertPooler(bert.config)
        self.dense = nn.Linear(opt['bert_dim'], opt['polarities_dim'])
Пример #17
0
    def __init__(self, bert, opt):
        super(LCF_BERT, self).__init__()

        self.bert_spc = bert
        self.opt = opt
        # self.bert_local = copy.deepcopy(bert)  # Uncomment the line to use dual Bert
        self.bert_local = bert  # Default to use single Bert and reduce memory requirements
        self.dropout = nn.Dropout(opt.dropout)
        self.bert_SA = SelfAttention(bert.config, opt)
        self.linear_double = nn.Linear(opt.bert_dim * 2, opt.bert_dim)
        self.linear_single = nn.Linear(opt.bert_dim, opt.bert_dim)
        self.bert_pooler = BertPooler(bert.config)
        self.dense = nn.Linear(opt.bert_dim, opt.polarities_dim)
Пример #18
0
    def __init__(self, config, add_pooling_layer=True):
        # Call the init one parent class up. Otherwise, the model will be defined twice.
        BertPreTrainedModel.__init__(self, config)
        self.config = config

        self.embeddings = BertEmbeddings(config)
        self.encoder = BertEncoder(config)

        self.pooler = BertPooler(config) if add_pooling_layer else None

        # Sparsify linear modules.
        self.sparsify_model()

        self.init_weights()
Пример #19
0
 def __init__(self, config, num_choices=2):
     super(BertForMultipleChoiceWithMatch, self).__init__(config)
     self.num_choices = num_choices
     self.bert = BertModel(config)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.classifier = nn.Linear(config.hidden_size, 1)
     self.classifier2 = nn.Linear(2 * config.hidden_size, 1)
     self.classifier3 = nn.Linear(3 * config.hidden_size, 1)
     self.classifier4 = nn.Linear(4 * config.hidden_size, 1)
     self.classifier6 = nn.Linear(6 * config.hidden_size, 1)
     self.ssmatch = SSingleMatchNet(config)
     self.pooler = BertPooler(config)
     self.fuse = FuseNet(config)
     self.init_weights()
Пример #20
0
    def __init__(self, config: LukeConfig):
        super(LukeModel, self).__init__()

        self.config = config

        self.encoder = BertEncoder(config)
        self.pooler = BertPooler(config)

        if self.config.bert_model_name and "roberta" in self.config.bert_model_name:
            self.embeddings = RobertaEmbeddings(config)
            self.embeddings.token_type_embeddings.requires_grad = False
        else:
            self.embeddings = BertEmbeddings(config)
        self.entity_embeddings = EntityEmbeddings(config)
Пример #21
0
 def __init__(self, bert, opt):
     super(SLIDE_LCF_BERT, self).__init__()
     self.bert4global = bert
     self.bert4local = copy.deepcopy(
         bert) if opt.use_dual_bert else self.bert4global
     self.opt = opt
     self.dropout = nn.Dropout(opt.dropout)
     self.encoder = Encoder(bert.config, opt)
     self.encoder_left = Encoder(bert.config, opt)
     self.encoder_right = Encoder(bert.config, opt)
     self.linear2 = nn.Linear(opt.embed_dim * 2, opt.embed_dim)
     self.linear_window_3h = nn.Linear(opt.embed_dim * 3, opt.embed_dim)
     self.linear_window_2h = nn.Linear(opt.embed_dim * 2, opt.embed_dim)
     self.bert_pooler = BertPooler(bert.config)
     self.dense = nn.Linear(opt.embed_dim, opt.polarities_dim)
Пример #22
0
 def __init__(self, embedding_matrix, opt):
     super(LCA_GLOVE, self).__init__()
     # Only few of the parameters are necessary in the config.json, such as hidden_size, num_attention_heads
     self.config = BertConfig.from_json_file("modules/utils/bert_config.json")
     self.opt = opt
     self.embed = nn.Embedding.from_pretrained(torch.tensor(embedding_matrix, dtype=torch.float))
     self.lc_embed = nn.Embedding(2, opt.embed_dim)
     self.global_encoder1 = SelfAttention(self.config, opt)
     self.local_encoder1 = SelfAttention(self.config, opt)
     self.local_encoder2 = SelfAttention(self.config, opt)
     self.mha = SelfAttention(self.config, opt)
     self.pool = BertPooler(self.config)
     self.dropout = nn.Dropout(opt.dropout)
     self.linear = nn.Linear(opt.embed_dim * 2, opt.embed_dim)
     self.dense = nn.Linear(opt.embed_dim, opt.polarities_dim)
     self.classifier = nn.Linear(opt.embed_dim, 2)
    def __init__(
            self,
            pretrained_model: str = 'bert-large-uncased',
            num_choices: int = 4,
            learning_rate: float = 2e-5,
            gradient_accumulation_steps: int = 1,
            num_train_epochs: float = 3.0,
            train_batch_size: int = 32,
            warmup_proportion: float = 0.1,
            train_all: bool = False,
            use_bert_adam: bool = True,
    ):
        super().__init__()
        self.config = BertConfig.from_pretrained(pretrained_model, num_choices=4)
        self.bert = BertModel.from_pretrained(pretrained_model, config=self.config)
        self.num_choices = num_choices
        self.dropout = nn.Dropout(self.config.hidden_dropout_prob)
        self.classifier = nn.Linear(3 * self.config.hidden_size, 1)
        self.ssmatch = SSingleMatchNet(self.config)
        self.pooler = BertPooler(self.config)
        self.fuse = FuseNet(self.config)

        if not train_all:
            for param in self.bert.parameters():
                param.requires_grad = False
            for param in self.bert.pooler.parameters():
                param.requires_grad = True
            # for param in self.bert.encoder.layer[15:24].parameters():
            #     param.requires_grad = True
            # for param in self.bert.encoder.layer[15].output.parameters():
            #     param.requires_grad = True

        # print model layers and config
        print(self.config)
        for name, params in self.named_parameters():
            print('-->name:', name, '-->grad_require:', params.requires_grad)

        self.learning_rate = learning_rate
        self.gradient_accumulation_steps = gradient_accumulation_steps
        self.num_train_epochs = num_train_epochs
        self.train_batch_size = train_batch_size
        self.warmup_proportion = warmup_proportion
        self.use_bert_adam = use_bert_adam

        self.warmup_steps = 0
        self.total_steps = 0
Пример #24
0
 def __init__(self, embedding_matrix, opt):
     super(LCF_GLOVE, self).__init__()
     self.config = BertConfig.from_json_file(
         "modules/utils/bert_config.json")
     self.opt = opt
     self.embed = nn.Embedding.from_pretrained(
         torch.tensor(embedding_matrix, dtype=torch.float))
     self.mha_global = SelfAttention(self.config, opt)
     self.mha_local = SelfAttention(self.config, opt)
     self.ffn_global = PositionwiseFeedForward(self.opt.embed_dim,
                                               dropout=self.opt.dropout)
     self.ffn_local = PositionwiseFeedForward(self.opt.embed_dim,
                                              dropout=self.opt.dropout)
     self.mha_local_SA = SelfAttention(self.config, opt)
     self.mha_global_SA = SelfAttention(self.config, opt)
     self.pool = BertPooler(self.config)
     self.dropout = nn.Dropout(opt.dropout)
     self.linear = nn.Linear(opt.embed_dim * 2, opt.embed_dim)
     self.dense = nn.Linear(opt.embed_dim, opt.polarities_dim)
    def __init__(self, config, visual_embedding_dim):
        super().__init__()

        # Attributes
        self.config = config
        self.config.visual_embedding_dim = visual_embedding_dim
        self.num_labels = config.num_labels

        # Build Bert
        self.embeddings = BertVisioLinguisticEmbeddings(self.config)
        self.encoder = BertEncoder(self.config)
        self.pooler = BertPooler(self.config)

        # Add classification head
        # Added sigmoid activation to smooth the output
        self.dropout = nn.Dropout(self.config.hidden_dropout_prob)
        self.classifier = nn.Sequential(
            BertPredictionHeadTransform(self.config),
            nn.Linear(self.config.hidden_size, self.num_labels), nn.Sigmoid())

        self.init_weights()
Пример #26
0
 def __init__(self, bert_base_model, args):
     super(LCF_ATEPC, self).__init__(config=bert_base_model.config)
     config = bert_base_model.config
     self.bert_for_global_context = bert_base_model
     self.args = args
     # do not init lcf layer if BERT-SPC or BERT-BASE specified
     # if self.args.local_context_focus in {'cdw', 'cdm', 'fusion'}:
     if not self.args.use_unique_bert:
         self.bert_for_local_context = copy.deepcopy(self.bert_for_global_context)
     else:
         self.bert_for_local_context = self.bert_for_global_context
     self.pooler = BertPooler(config)
     if args.dataset in {'camera', 'car', 'phone', 'notebook'}:
         self.dense = torch.nn.Linear(768, 2)
     else:
         self.dense = torch.nn.Linear(768, 3)
     self.bert_global_focus = self.bert_for_global_context
     self.dropout = nn.Dropout(self.args.dropout)
     self.SA1 = SelfAttention(config, args)
     self.SA2 = SelfAttention(config, args)
     self.linear_double = nn.Linear(768 * 2, 768)
     self.linear_triple = nn.Linear(768 * 3, 768)
def convert_checkpoint_to_pytorch(tf_checkpoint_path: str, config_path: str,
                                  pytorch_dump_path: str):
    def get_masked_lm_array(name: str):
        full_name = f"masked_lm/{name}/.ATTRIBUTES/VARIABLE_VALUE"
        array = tf.train.load_variable(tf_checkpoint_path, full_name)

        if "kernel" in name:
            array = array.transpose()

        return torch.from_numpy(array)

    def get_encoder_array(name: str):
        full_name = f"encoder/{name}/.ATTRIBUTES/VARIABLE_VALUE"
        array = tf.train.load_variable(tf_checkpoint_path, full_name)

        if "kernel" in name:
            array = array.transpose()

        return torch.from_numpy(array)

    def get_encoder_layer_array(layer_index: int, name: str):
        full_name = f"encoder/_transformer_layers/{layer_index}/{name}/.ATTRIBUTES/VARIABLE_VALUE"
        array = tf.train.load_variable(tf_checkpoint_path, full_name)

        if "kernel" in name:
            array = array.transpose()

        return torch.from_numpy(array)

    def get_encoder_attention_layer_array(layer_index: int, name: str,
                                          orginal_shape):
        full_name = f"encoder/_transformer_layers/{layer_index}/_attention_layer/{name}/.ATTRIBUTES/VARIABLE_VALUE"
        array = tf.train.load_variable(tf_checkpoint_path, full_name)
        array = array.reshape(orginal_shape)

        if "kernel" in name:
            array = array.transpose()

        return torch.from_numpy(array)

    print(f"Loading model based on config from {config_path}...")
    config = BertConfig.from_json_file(config_path)
    model = BertForMaskedLM(config)

    # Layers
    for layer_index in range(0, config.num_hidden_layers):
        layer: BertLayer = model.bert.encoder.layer[layer_index]

        # Self-attention
        self_attn: BertSelfAttention = layer.attention.self

        self_attn.query.weight.data = get_encoder_attention_layer_array(
            layer_index, "_query_dense/kernel",
            self_attn.query.weight.data.shape)
        self_attn.query.bias.data = get_encoder_attention_layer_array(
            layer_index, "_query_dense/bias", self_attn.query.bias.data.shape)
        self_attn.key.weight.data = get_encoder_attention_layer_array(
            layer_index, "_key_dense/kernel", self_attn.key.weight.data.shape)
        self_attn.key.bias.data = get_encoder_attention_layer_array(
            layer_index, "_key_dense/bias", self_attn.key.bias.data.shape)
        self_attn.value.weight.data = get_encoder_attention_layer_array(
            layer_index, "_value_dense/kernel",
            self_attn.value.weight.data.shape)
        self_attn.value.bias.data = get_encoder_attention_layer_array(
            layer_index, "_value_dense/bias", self_attn.value.bias.data.shape)

        # Self-attention Output
        self_output: BertSelfOutput = layer.attention.output

        self_output.dense.weight.data = get_encoder_attention_layer_array(
            layer_index, "_output_dense/kernel",
            self_output.dense.weight.data.shape)
        self_output.dense.bias.data = get_encoder_attention_layer_array(
            layer_index, "_output_dense/bias",
            self_output.dense.bias.data.shape)

        self_output.LayerNorm.weight.data = get_encoder_layer_array(
            layer_index, "_attention_layer_norm/gamma")
        self_output.LayerNorm.bias.data = get_encoder_layer_array(
            layer_index, "_attention_layer_norm/beta")

        # Intermediate
        intermediate: BertIntermediate = layer.intermediate

        intermediate.dense.weight.data = get_encoder_layer_array(
            layer_index, "_intermediate_dense/kernel")
        intermediate.dense.bias.data = get_encoder_layer_array(
            layer_index, "_intermediate_dense/bias")

        # Output
        bert_output: BertOutput = layer.output

        bert_output.dense.weight.data = get_encoder_layer_array(
            layer_index, "_output_dense/kernel")
        bert_output.dense.bias.data = get_encoder_layer_array(
            layer_index, "_output_dense/bias")

        bert_output.LayerNorm.weight.data = get_encoder_layer_array(
            layer_index, "_output_layer_norm/gamma")
        bert_output.LayerNorm.bias.data = get_encoder_layer_array(
            layer_index, "_output_layer_norm/beta")

    # Embeddings
    model.bert.embeddings.position_embeddings.weight.data = get_encoder_array(
        "_position_embedding_layer/embeddings")
    model.bert.embeddings.token_type_embeddings.weight.data = get_encoder_array(
        "_type_embedding_layer/embeddings")
    model.bert.embeddings.LayerNorm.weight.data = get_encoder_array(
        "_embedding_norm_layer/gamma")
    model.bert.embeddings.LayerNorm.bias.data = get_encoder_array(
        "_embedding_norm_layer/beta")

    # LM Head
    lm_head = model.cls.predictions.transform

    lm_head.dense.weight.data = get_masked_lm_array("dense/kernel")
    lm_head.dense.bias.data = get_masked_lm_array("dense/bias")

    lm_head.LayerNorm.weight.data = get_masked_lm_array("layer_norm/gamma")
    lm_head.LayerNorm.bias.data = get_masked_lm_array("layer_norm/beta")

    model.bert.embeddings.word_embeddings.weight.data = get_masked_lm_array(
        "embedding_table")

    # Pooling
    model.bert.pooler = BertPooler(config=config)
    model.bert.pooler.dense.weight.data: BertPooler = get_encoder_array(
        "_pooler_layer/kernel")
    model.bert.pooler.dense.bias.data: BertPooler = get_encoder_array(
        "_pooler_layer/bias")

    # Export final model
    model.save_pretrained(pytorch_dump_path)

    # Integration test - should load without any errors ;)
    new_model = BertForMaskedLM.from_pretrained(pytorch_dump_path)
    print(new_model.eval())

    print("Model conversion was done sucessfully!")
Пример #28
0
    class TestBertPooler(unittest.TestCase):
        def init_data(self, use_cuda: bool) -> None:
            self.test_device = torch.device('cuda:0') if use_cuda else \
                    torch.device('cpu:0')
            if not use_cuda:
                torch.set_num_threads(4)

            torch.set_grad_enabled(False)
            self.cfg = BertConfig()

            self.torch_pooler = BertPooler(self.cfg)
            if torch.cuda.is_available():
                self.torch_pooler.to(self.test_device)
            self.torch_pooler.eval()

            self.turbo_pooler = turbo_transformers.BertPooler.from_torch(
                self.torch_pooler)

        def check_torch_and_turbo(self, use_cuda):
            self.init_data(use_cuda=use_cuda)
            device = "GPU" if use_cuda else "CPU"

            num_iter = 2
            hidden_size = self.cfg.hidden_size
            input_tensor = torch.rand(size=(batch_size, 1, hidden_size),
                                      dtype=torch.float32,
                                      device=self.test_device)

            torch_model = lambda: self.torch_pooler(input_tensor)
            torch_result, torch_qps, torch_time = \
                test_helper.run_model(torch_model, use_cuda, num_iter)
            print(f"BertPooler \"({batch_size},{hidden_size:03})\" ",
                  f"{device} Torch QPS,  {torch_qps}, time, {torch_time}")

            turbo_model = lambda: self.turbo_pooler(
                input_tensor.reshape((batch_size, hidden_size)))
            turbo_result, turbo_qps, turbo_time = \
                test_helper.run_model(turbo_model, use_cuda, num_iter)

            print(
                f"BertPooler \"({batch_size}, {hidden_size}\" ",
                f"{device} TurboTransform QPS,  {turbo_qps}, time, {turbo_time}"
            )

            torch_result = torch_result.cpu().numpy()
            turbo_result = turbo_result.cpu().numpy()

            self.assertTrue(
                numpy.allclose(torch_result,
                               turbo_result,
                               rtol=1e-4,
                               atol=1e-3))

            with open("bert_pooler_res.txt", "a") as fh:
                fh.write(
                    f"\"({batch_size},{hidden_size:03})\", {torch_qps}, {torch_qps}\n"
                )

        def test_pooler(self):
            self.check_torch_and_turbo(use_cuda=False)
            if torch.cuda.is_available() and \
                turbo_transformers.config.is_compiled_with_cuda():
                self.check_torch_and_turbo(use_cuda=True)
 def __init__(self, config):
     super().__init__()
     self.pooler = BertPooler(config)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.classifier = nn.Linear(config.hidden_size, config.num_labels)