Exemplo n.º 1
0
    def __init__(self, config):
        super().__init__()

        self._temperature = config.temperature
        self._pad_token_id = config.pad_token_id

        self.layer_norm = nn.LayerNorm(config.embedding_size)
        self.dense = nn.Linear(config.hidden_size, config.embedding_size)
        self.activation = get_activation(config.hidden_act)
 def forward(self, features, **kwargs):
     x = features[:, 0, :]  # take <s> token (equiv. to [CLS])
     x = self.dropout1(x)
     x = self.dense(x)
     x = get_activation("gelu")(
         x
     )  # although BERT uses tanh here, it seems Electra authors used gelu here
     x = self.dropout2(x)
     x = self.out_proj(x)
     return x
Exemplo n.º 3
0
 def call_adapter(self, inputs, adapter_weights):
     """Computes the output of the adapter layers."""
     down = F.linear(inputs,
                     weight=adapter_weights.down.weight,
                     bias=adapter_weights.down.bias)
     middle = get_activation(self.activation_type)(down)
     output = F.linear(middle,
                       weight=adapter_weights.up.weight,
                       bias=adapter_weights.up.bias)
     return output
    def forward(self, *args, **kwargs):
        x = self.pretrained_model(
            *args, **kwargs)[0][:, 0, :]  # take <s> token (equiv. to [CLS])
        x = self.d1(x)
        x = self.l1(x)
        x = self.bn1(x)
        x = get_activation("gelu")(x)
        x = self.d2(x)
        x = self.l2(x)

        return x
Exemplo n.º 5
0
    def forward(self, discriminator_hidden_states, attention_mask, labels):
        hidden_states = self.dense(discriminator_hidden_states)
        hidden_states = get_activation(self.config.hidden_act)(hidden_states)

        logits = self.dense_prediction(hidden_states).squeeze_()
        probs = torch.nn.Sigmoid()(logits)
        preds = torch.round((logits.sign() + 1) / 2)

        loss_fct = nn.BCEWithLogitsLoss()
        loss = loss_fct(logits.view(-1, discriminator_hidden_states.shape[1]),
                        labels.float())

        return probs, preds, loss
Exemplo n.º 6
0
    def __init__(
        self,
        web_hidden_size,
        linear_hidden=1536,
        dropout=0.1,
        activation_string="gelu",
    ):
        super(LinearClassifier, self).__init__()
        self.dropout1 = nn.Dropout(dropout) if dropout else nn.Identity()
        self.linear1 = nn.Linear(web_hidden_size, linear_hidden)
        self.linear2 = nn.Linear(linear_hidden, 1)
        # self.sigmoid = nn.Sigmoid()

        # support older versions of huggingface/transformers
        if activation_string == "gelu":
            self.activation = nn.GELU()
        else:
            self.activation = (get_activation(activation_string)
                               if activation_string else nn.Identity())
Exemplo n.º 7
0
    def forward(self, generator_hidden_states):
        hidden_states = self.dense(generator_hidden_states)
        hidden_states = get_activation("gelu")(hidden_states)
        hidden_states = self.LayerNorm(hidden_states)

        return hidden_states
Exemplo n.º 8
0
    def forward(self, discriminator_hidden_states, attention_mask):
        hidden_states = self.dense(discriminator_hidden_states)
        hidden_states = get_activation(self.config.hidden_act)(hidden_states)
        logits = self.dense_prediction(hidden_states).squeeze()

        return logits
Exemplo n.º 9
0
 def test_get_activation(self):
     get_activation("swish")
     get_activation("silu")
     get_activation("relu")
     get_activation("tanh")
     get_activation("gelu_new")
     get_activation("gelu_fast")
     get_activation("gelu_python")
     get_activation("quick_gelu")
     get_activation("mish")
     get_activation("linear")
     get_activation("sigmoid")
     with self.assertRaises(KeyError):
         get_activation("bogus")
     with self.assertRaises(KeyError):
         get_activation(None)
Exemplo n.º 10
0
 def test_gelu_versions(self):
     x = torch.tensor([-100, -1, -0.1, 0, 0.1, 1.0, 100])
     torch_builtin = get_activation("gelu")
     self.assertTrue(torch.allclose(gelu_python(x), torch_builtin(x)))
     self.assertFalse(torch.allclose(gelu_python(x), gelu_new(x)))
Exemplo n.º 11
0
 def __init__(self, activation_type):
     super().__init__()
     self.f = get_activation(activation_type)
Exemplo n.º 12
0
 def forward(self, discriminator_hidden_states):
     hidden_states = self.dense(discriminator_hidden_states)
     hidden_states = get_activation(self.config.hidden_act)(hidden_states)
     logits = self.dense_prediction(hidden_states)
     return logits
    def forward(self,
                input_ids=None,
                attention_mask=None,
                token_type_ids=None,
                position_ids=None,
                head_mask=None,
                inputs_embeds=None,
                output_attentions=None,
                entity_token_ids=None):
        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions

        if input_ids is not None and inputs_embeds is not None:
            raise ValueError(
                "You cannot specify both input_ids and inputs_embeds at the same time"
            )
        elif input_ids is not None:
            input_shape = input_ids.size()
        elif inputs_embeds is not None:
            input_shape = inputs_embeds.size()[:-1]
        else:
            raise ValueError(
                "You have to specify either input_ids or inputs_embeds")

        device = input_ids.device if input_ids is not None else inputs_embeds.device

        if attention_mask is None:
            attention_mask = torch.ones(input_shape, device=device)
        if token_type_ids is None:
            token_type_ids = torch.zeros(input_shape,
                                         dtype=torch.long,
                                         device=device)

        extended_attention_mask = self.get_extended_attention_mask(
            attention_mask, input_shape, device)
        head_mask = self.get_head_mask(head_mask,
                                       self.config.num_hidden_layers)

        hidden_states = self.embeddings(input_ids=input_ids,
                                        position_ids=position_ids,
                                        token_type_ids=token_type_ids,
                                        inputs_embeds=inputs_embeds)

        if hasattr(self, "embeddings_project"):
            hidden_states = self.embeddings_project(hidden_states)

        hidden_states = self.encoder(hidden_states,
                                     attention_mask=extended_attention_mask,
                                     head_mask=head_mask
                                     # output_attentions=output_attentions,
                                     )
        sequence_output = hidden_states[0]
        batch_size = sequence_output.shape[0]

        batch_embedding = []

        for i in range(batch_size):
            entity_embedding = sequence_output[i][entity_token_ids[i]]
            batch_embedding.append(entity_embedding.tolist())

        batch_embedding = torch.tensor(batch_embedding).cuda()
        sequence_output_cls = batch_embedding
        x = self.dropout(sequence_output_cls)
        x = self.dense(x)
        x = get_activation("gelu")(
            x
        )  # although BERT uses tanh here, it seems Electra authors used gelu here
        x = self.dropout(x)
        x = self.out_proj(x)
        return x
    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        output_attentions=None,
        used_entity_token=False,
        masked_entities_list=None,
        chemical_code_list=None,
        disease_code_list=None,
        is_full_sample=False,
        label_length=0,
    ):

        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions

        if input_ids is not None and inputs_embeds is not None:
            raise ValueError(
                "You cannot specify both input_ids and inputs_embeds at the same time"
            )
        elif input_ids is not None:
            input_shape = input_ids.size()
        elif inputs_embeds is not None:
            input_shape = inputs_embeds.size()[:-1]
        else:
            raise ValueError(
                "You have to specify either input_ids or inputs_embeds")

        device = input_ids.device if input_ids is not None else inputs_embeds.device

        if attention_mask is None:
            attention_mask = torch.ones(input_shape, device=device)
        if token_type_ids is None:
            token_type_ids = torch.zeros(input_shape,
                                         dtype=torch.long,
                                         device=device)

        extended_attention_mask = self.get_extended_attention_mask(
            attention_mask, input_shape, device)
        head_mask = self.get_head_mask(head_mask,
                                       self.config.num_hidden_layers)

        hidden_states = self.embeddings(input_ids=input_ids,
                                        position_ids=position_ids,
                                        token_type_ids=token_type_ids,
                                        inputs_embeds=inputs_embeds)

        if hasattr(self, "embeddings_project"):
            hidden_states = self.embeddings_project(hidden_states)

        hidden_states = self.encoder(
            hidden_states,
            attention_mask=extended_attention_mask,
            head_mask=head_mask,
            # output_attentions=output_attentions,
        )
        sequence_output = hidden_states[0]

        batch_size = chemical_code_list.shape[0]

        def get_entity_embedding(token_embedding, masked_entities, code):
            embedding = None
            for i, mask in enumerate(masked_entities):
                if mask == code:
                    embedding = token_embedding[i]
                    break
            return embedding

        def get_all_entity_embedding(token_embedding, masked_entities, code):
            embedding_size = list(token_embedding.size())[-1]
            embedding = []
            current_idx = 0
            for i, mask in enumerate(masked_entities):
                if mask == code:
                    if i != current_idx - 1:  #get first embedding
                        embedding.append(token_embedding[i])
                    current_idx = i
            if len(embedding) == 0:
                embedding = [torch.zeros(embedding_size)]
                if torch.cuda.is_available():
                    embedding = torch.stack(embedding).cuda()
            else:
                embedding = torch.stack(embedding)
            return embedding

        def generate_code_pairs_list(chemical_code_list_encoded,
                                     disease_code_list_encoded, label_len):
            chemical_codes = []
            disease_codes = []
            chemical_code_size = list(chemical_code_list_encoded.size())
            disease_code_size = list(disease_code_list_encoded.size())
            tensor_size = chemical_code_size[0] * disease_code_size[0]
            for i in range(chemical_code_size[0]):
                if chemical_code_list_encoded[i] == -1:
                    break
                for j in range(disease_code_size[0]):
                    if disease_code_list_encoded[j] == -1:
                        break
                    chemical_codes.append(chemical_code_list_encoded[i])
                    disease_codes.append(disease_code_list_encoded[j])
            for i in range(len(chemical_codes), label_len):
                chemical_codes.append(-1)
                disease_codes.append(-1)
            return chemical_codes, disease_codes

        # def get_entity_embedding(token_embedding, masked_entities, code):
        #     count = 0
        #     embedding = torch.zeros(token_embedding.shape[1]).cuda()
        #     check = True
        #     for i, mask in enumerate(masked_entities):
        #         if mask == code:
        #             if check:
        #                 count += 1
        #                 check = False
        #             embedding += token_embedding[i]
        #         else:
        #             check = True

        #     embedding = embedding / count
        #     return embedding

        # def get_entity_embedding_use_e_token(token_embedding, masked_entities, code):
        #     embedding = None
        #     for i, mask in enumerate(masked_entities):
        #         if mask == code:
        #             embedding = token_embedding[i]

        #     return embedding

        batch_embedding = []

        if not is_full_sample:
            for i in range(batch_size):
                masked_entities = masked_entities_list[i]
                chemical_code = chemical_code_list[i]
                disease_code = disease_code_list[i]
                token_embedding = sequence_output[i]
                chemical_embedding = get_entity_embedding(
                    token_embedding, masked_entities, chemical_code)
                disease_embedding = get_entity_embedding(
                    token_embedding, masked_entities, disease_code)
                # print('chemical_embedding shape: ', chemical_embedding.shape)
                # print('disease_embedding shape: ', disease_embedding.shape)
                entity_embedding = torch.cat(
                    (chemical_embedding, disease_embedding), 0)
                # print(entity_embedding.shape)
                batch_embedding.append(entity_embedding.tolist())
            batch_embedding = torch.tensor(batch_embedding).cuda()
            sequence_output_cls = batch_embedding
            x = self.dropout(sequence_output_cls)
            x = self.dense(x)
            x = get_activation("gelu")(
                x
            )  # although BERT uses tanh here, it seems Electra authors used gelu here
            x = self.dropout(x)
            x = self.out_proj(x)
            return x
        else:
            batch_embedding = []
            for i in range(batch_size):
                masked_entities = masked_entities_list[i]
                chemical_codes, disease_codes = generate_code_pairs_list(
                    chemical_code_list[i], disease_code_list[i], label_length)
                token_embedding = sequence_output[i]
                current_output = []
                for j in range(len(chemical_codes)):
                    chemical_embeddings = get_all_entity_embedding(
                        token_embedding, masked_entities, chemical_codes[j])
                    disease_embeddings = get_all_entity_embedding(
                        token_embedding, masked_entities, disease_codes[j])
                    chemical_embedding = torch.mean(chemical_embeddings, dim=0)
                    disease_embedding = torch.mean(disease_embeddings, dim=0)
                    r_rep = torch.cat([chemical_embedding, disease_embedding],
                                      0)
                    current_output.append(r_rep)
                current_output_stacked = torch.stack(current_output).unsqueeze(
                    0)
                batch_embedding.append(current_output_stacked)
            batch_embedding = torch.cat(batch_embedding, 0)
            sequence_output_cls = batch_embedding
            x = self.dropout(sequence_output_cls)
            x = self.dense(x)
            x = get_activation("gelu")(
                x
            )  # although BERT uses tanh here, it seems Electra authors used gelu here
            x = self.dropout(x)
            x = self.out_proj(x)
            return x
    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        output_attentions=None,
        used_entity_token=False,
    ):
        r"""
    Return:
        :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.ElectraConfig`) and inputs:
        last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`):
            Sequence of hidden-states at the output of the last layer of the model.
        hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``config.output_hidden_states=True``):
            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
            of shape :obj:`(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or ``config.output_attentions=True``):
            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.

    Examples::

        from transformers import ElectraModel, ElectraTokenizer
        import torch

        tokenizer = ElectraTokenizer.from_pretrained('google/electra-small-discriminator')
        model = ElectraModel.from_pretrained('google/electra-small-discriminator')

        input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0)  # Batch size 1
        outputs = model(input_ids)

        last_hidden_states = outputs[0]  # The last hidden-state is the first element of the output tuple

        """

        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions

        if input_ids is not None and inputs_embeds is not None:
            raise ValueError(
                "You cannot specify both input_ids and inputs_embeds at the same time"
            )
        elif input_ids is not None:
            input_shape = input_ids.size()
        elif inputs_embeds is not None:
            input_shape = inputs_embeds.size()[:-1]
        else:
            raise ValueError(
                "You have to specify either input_ids or inputs_embeds")

        device = input_ids.device if input_ids is not None else inputs_embeds.device

        if attention_mask is None:
            attention_mask = torch.ones(input_shape, device=device)
        if token_type_ids is None:
            token_type_ids = torch.zeros(input_shape,
                                         dtype=torch.long,
                                         device=device)

        extended_attention_mask = self.get_extended_attention_mask(
            attention_mask, input_shape, device)
        head_mask = self.get_head_mask(head_mask,
                                       self.config.num_hidden_layers)

        hidden_states = self.embeddings(input_ids=input_ids,
                                        position_ids=position_ids,
                                        token_type_ids=token_type_ids,
                                        inputs_embeds=inputs_embeds)

        if hasattr(self, "embeddings_project"):
            hidden_states = self.embeddings_project(hidden_states)

        hidden_states = self.encoder(
            hidden_states,
            attention_mask=extended_attention_mask,
            head_mask=head_mask,
            # output_attentions=output_attentions,
        )
        sequence_output = hidden_states[0]
        sequence_output_cls = sequence_output[:, 0, :]
        x = self.dropout(sequence_output_cls)
        x = self.dense(x)
        x = get_activation("gelu")(
            x
        )  # although BERT uses tanh here, it seems Electra authors used gelu here
        x = self.dropout(x)
        x = self.out_proj(x)
        return x
    def forward(self,
                input_ids=None,
                attention_mask=None,
                token_type_ids=None,
                position_ids=None,
                head_mask=None,
                inputs_embeds=None,
                output_attentions=None,
                used_entity_token=True,
                masked_entities_list=None,
                chemical_code_list=None,
                disease_code_list=None,
                other_code_list=None):
        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions

        if input_ids is not None and inputs_embeds is not None:
            raise ValueError(
                "You cannot specify both input_ids and inputs_embeds at the same time"
            )
        elif input_ids is not None:
            input_shape = input_ids.size()
        elif inputs_embeds is not None:
            input_shape = inputs_embeds.size()[:-1]
        else:
            raise ValueError(
                "You have to specify either input_ids or inputs_embeds")

        device = input_ids.device if input_ids is not None else inputs_embeds.device

        if attention_mask is None:
            attention_mask = torch.ones(input_shape, device=device)
        if token_type_ids is None:
            token_type_ids = torch.zeros(input_shape,
                                         dtype=torch.long,
                                         device=device)

        extended_attention_mask = self.get_extended_attention_mask(
            attention_mask, input_shape, device)
        head_mask = self.get_head_mask(head_mask,
                                       self.config.num_hidden_layers)

        hidden_states = self.embeddings(input_ids=input_ids,
                                        position_ids=position_ids,
                                        token_type_ids=token_type_ids,
                                        inputs_embeds=inputs_embeds)

        if hasattr(self, "embeddings_project"):
            hidden_states = self.embeddings_project(hidden_states)

        hidden_states = self.encoder(
            hidden_states,
            attention_mask=extended_attention_mask,
            head_mask=head_mask,
            # output_attentions=output_attentions,
        )

        batch_size = chemical_code_list.shape[0]

        token_embedding_output = hidden_states[0]

        def get_entity_embedding(token_embedding, masked_entities, code):
            count = 0
            embedding = None
            check = True
            for i, mask in enumerate(masked_entities):
                if mask == code:
                    if check:
                        count += 1
                        check = False
                    if embedding == None:
                        embedding = token_embedding[i]
                    else:
                        embedding += token_embedding[i]
                    # print('embedding shape: ', embedding.shape)
                else:
                    check = True

            embedding = embedding / count
            return embedding

        batch_embedding = []

        if not used_entity_token:
            for i in range(batch_size):
                masked_entities = masked_entities_list[i]
                chemical_code = chemical_code_list[i]
                disease_code = disease_code_list[i]
                other_code = other_code_list[i]
                token_embedding = token_embedding_output[i]
                if chemical_code == -1:
                    other_embedding = get_entity_embedding(
                        token_embedding, masked_entities, other_code)
                    disease_embedding = get_entity_embedding(
                        token_embedding, masked_entities, disease_code)
                    entity_embedding = torch.cat(
                        (disease_embedding, other_embedding), 0)
                elif disease_code == -1:
                    chemical_embedding = get_entity_embedding(
                        token_embedding, masked_entities, chemical_code)
                    other_embedding = get_entity_embedding(
                        token_embedding, masked_entities, other_code)
                    entity_embedding = torch.cat(
                        (chemical_embedding, other_embedding), 0)
                elif other_code == -1:
                    chemical_embedding = get_entity_embedding(
                        token_embedding, masked_entities, chemical_code)
                    disease_embedding = get_entity_embedding(
                        token_embedding, masked_entities, disease_code)
                    entity_embedding = torch.cat(
                        (chemical_embedding, disease_embedding), 0)

                batch_embedding.append(entity_embedding.tolist())
        batch_embedding = torch.tensor(batch_embedding).cuda()
        sequence_output_cls = batch_embedding
        x = self.dropout(sequence_output_cls)
        x = self.dense(x)
        x = get_activation("tanh")(x)
        x = self.dropout(x)
        x = self.out_proj(x)
        return x
    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        output_attentions=None,
        used_entity_token=False,
        masked_entities_list=None,
        chemical_code_list=None,
        disease_code_list=None,
    ):
        r"""
    Return:
        :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.ElectraConfig`) and inputs:
        last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`):
            Sequence of hidden-states at the output of the last layer of the model.
        hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``config.output_hidden_states=True``):
            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
            of shape :obj:`(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or ``config.output_attentions=True``):
            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.

    Examples::

        from transformers import ElectraModel, ElectraTokenizer
        import torch

        tokenizer = ElectraTokenizer.from_pretrained('google/electra-small-discriminator')
        model = ElectraModel.from_pretrained('google/electra-small-discriminator')

        input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0)  # Batch size 1
        outputs = model(input_ids)

        last_hidden_states = outputs[0]  # The last hidden-state is the first element of the output tuple

        """

        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions

        if input_ids is not None and inputs_embeds is not None:
            raise ValueError(
                "You cannot specify both input_ids and inputs_embeds at the same time"
            )
        elif input_ids is not None:
            input_shape = input_ids.size()
        elif inputs_embeds is not None:
            input_shape = inputs_embeds.size()[:-1]
        else:
            raise ValueError(
                "You have to specify either input_ids or inputs_embeds")

        device = input_ids.device if input_ids is not None else inputs_embeds.device

        if attention_mask is None:
            attention_mask = torch.ones(input_shape, device=device)
        if token_type_ids is None:
            token_type_ids = torch.zeros(input_shape,
                                         dtype=torch.long,
                                         device=device)

        extended_attention_mask = self.get_extended_attention_mask(
            attention_mask, input_shape, device)
        head_mask = self.get_head_mask(head_mask,
                                       self.config.num_hidden_layers)

        hidden_states = self.embeddings(input_ids=input_ids,
                                        position_ids=position_ids,
                                        token_type_ids=token_type_ids,
                                        inputs_embeds=inputs_embeds)

        if hasattr(self, "embeddings_project"):
            hidden_states = self.embeddings_project(hidden_states)

        hidden_states = self.encoder(
            hidden_states,
            attention_mask=extended_attention_mask,
            head_mask=head_mask,
            # output_attentions=output_attentions,
        )

        batch_size = chemical_code_list.shape[0]
        token_embedding_output = hidden_states[0]

        def get_entity_embedding(token_embedding, masked_entities, code):
            count = 0
            embedding = torch.zeros(token_embedding.shape[1]).cuda()
            check = True
            for i, mask in enumerate(masked_entities):
                if mask == code:
                    if check:
                        count += 1
                        check = False
                    embedding += token_embedding[i]
                else:
                    check = True

            embedding = embedding / count
            return embedding

        # def get_entity_token_embedding(token_embedding, masked_entities, code):
        #     count = 0
        #     embedding = torch.zeros(token_embedding.shape[1]).cuda()
        #     check = True
        #     for i, mask in enumerate(masked_entities):
        #         if mask == code and check:
        #             count += 1
        #             embedding += token_embedding[i]
        #         else:
        #             if
        #     # embedding = embedding / count
        #     return embedding

        batch_embedding = []

        if not used_entity_token:
            for i in range(batch_size):
                masked_entities = masked_entities_list[i]
                chemical_code = chemical_code_list[i]
                disease_code = disease_code_list[i]
                token_embedding = token_embedding_output[i]
                chemical_embedding = get_entity_embedding(
                    token_embedding, masked_entities, chemical_code)
                disease_embedding = get_entity_embedding(
                    token_embedding, masked_entities, disease_code)
                # print('chemical_embedding shape: ', chemical_embedding.shape)
                # print('disease_embedding shape: ', disease_embedding.shape)
                entity_embedding = torch.cat(
                    (chemical_embedding, disease_embedding), 0)
                # print(entity_embedding.shape)
                batch_embedding.append(entity_embedding.tolist())
        # else:
        #     for i in range(batch_size):
        #         masked_entities = masked_entities_list[i]
        #         chemical_code = chemical_code_list[i]
        #         disease_code = disease_code_list[i]
        #         token_embedding = token_embedding_output[i]
        #         chemical_embedding = get_entity_embedding(token_embedding, masked_entities, chemical_code)
        #         disease_embedding = get_entity_embedding(token_embedding, masked_entities, disease_code)
        #         # print('chemical_embedding shape: ', chemical_embedding.shape)
        #         # print('disease_embedding shape: ', disease_embedding.shape)
        #         entity_embedding = torch.cat((chemical_embedding, disease_embedding), 0)
        #         # print(entity_embedding.shape)
        #         batch_embedding.append(entity_embedding.tolist())
        batch_embedding = torch.tensor(batch_embedding).cuda()
        # print('batch_embedding shape: ', batch_embedding.shape)
        sequence_output_cls = batch_embedding
        x = self.dropout(sequence_output_cls)
        x = self.dense(x)
        x = get_activation("gelu")(
            x
        )  # although BERT uses tanh here, it seems Electra authors used gelu here
        x = self.dropout(x)
        x = self.out_proj(x)
        return x
Exemplo n.º 18
0
 def test_get_activation(self):
     get_activation("swish")
     get_activation("relu")
     get_activation("tanh")
     get_activation("gelu_new")
     get_activation("gelu_fast")
     with self.assertRaises(KeyError):
         get_activation("bogus")
     with self.assertRaises(KeyError):
         get_activation(None)
Exemplo n.º 19
0
    def __init__(self, config):
        super().__init__()

        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
        self.activation = get_activation(config.hidden_act)
        self.dense_prediction = nn.Linear(config.hidden_size, 1)