Пример #1
0
    def __init__(self, num_classes: int, embedding_dim: int, k_max: int, vocab: Vocab) -> None:
        """Instantiating VDCNN class

        Args:
            num_classes (int): the number of classes
            embedding_dim (int): the dimension of embedding vector for token
            k_max (int): the parameter of k-max pooling following last convolution block
            vocab (gluonnlp.Vocab): the instance of gluonnlp.Vocab
        """
        super(VDCNN, self).__init__()
        self._extractor = nn.Sequential(nn.Embedding(len(vocab), embedding_dim, vocab.to_indices(vocab.padding_token)),
                                        Permute(),
                                        nn.Conv1d(embedding_dim, 64, 3, 1, 1),
                                        ConvBlock(64, 64),
                                        ConvBlock(64, 64),
                                        nn.MaxPool1d(2, 2),
                                        ConvBlock(64, 128),
                                        ConvBlock(128, 128),
                                        nn.MaxPool1d(2, 2),
                                        ConvBlock(128, 256),
                                        ConvBlock(256, 256),
                                        nn.MaxPool1d(2, 2),
                                        ConvBlock(256, 512),
                                        ConvBlock(512, 512),
                                        nn.AdaptiveMaxPool1d(k_max),
                                        Flatten())

        self._classifier = nn.Sequential(nn.Linear(512 * k_max, 2048),
                                         nn.ReLU(),
                                         nn.Linear(2048, 2048),
                                         nn.ReLU(),
                                         nn.Linear(2048, num_classes))
Пример #2
0
    def __init__(self, num_classes: int, embedding_dim: int,
                 vocab: Vocab) -> None:
        """Instantiating CharCNN class

        Args:
            num_classes (int): the number of classes
            embedding_dim (int): the dimension of embedding vector for token
            vocab (gluonnlp.Vocab): the instance of gluonnlp.Vocab
        """
        super(CharCNN, self).__init__()
        self._extractor = nn.Sequential(
            nn.Embedding(len(vocab), embedding_dim,
                         vocab.to_indices(vocab.padding_token)), Permute(),
            nn.Conv1d(in_channels=embedding_dim,
                      out_channels=256,
                      kernel_size=7), nn.ReLU(), nn.MaxPool1d(3, 3),
            nn.Conv1d(in_channels=256, out_channels=256, kernel_size=7),
            nn.ReLU(), nn.MaxPool1d(3, 3),
            nn.Conv1d(in_channels=256, out_channels=256, kernel_size=3),
            nn.ReLU(),
            nn.Conv1d(in_channels=256, out_channels=256, kernel_size=3),
            nn.ReLU(),
            nn.Conv1d(in_channels=256, out_channels=256, kernel_size=3),
            nn.ReLU(),
            nn.Conv1d(in_channels=256, out_channels=256, kernel_size=3),
            nn.ReLU(), nn.MaxPool1d(3, 3), Flatten())

        self._classifier = nn.Sequential(
            nn.Linear(in_features=1792, out_features=512), nn.ReLU(),
            nn.Dropout(), nn.Linear(in_features=512, out_features=512),
            nn.ReLU(), nn.Dropout(),
            nn.Linear(in_features=512, out_features=num_classes))

        self.apply(self._init_weights)
Пример #3
0
    def __init__(self, label_vocab: Vocab, token_vocab: Vocab, lstm_hidden_dim: int) -> None:
        """Instantiating BilstmCRF class

        Args:
            token_vocab: (gluonnlp.Vocab): the instance of gluonnlp.Vocab that has token information
            label_vocab: (gluonnlp.Vocab): the instance of gluonnlp.Vocab that has label information
            lstm_hidden_dim (int): the number of hidden dimension of lstm
        """
        super(BilstmCRF, self).__init__()
        self._embedding = Embedding(token_vocab, padding_idx=token_vocab.to_indices(token_vocab.padding_token),
                                    freeze=False, permuting=False, tracking=True)
        self._pipe = Linker(permuting=False)
        self._bilstm = BiLSTM(self._embedding._ops.embedding_dim, lstm_hidden_dim, using_sequence=True)
        self._fc = nn.Linear(2 * lstm_hidden_dim, len(label_vocab))
        self._crf = CRF(len(label_vocab), bos_tag_id=label_vocab.to_indices(label_vocab.bos_token),
                        eos_tag_id=label_vocab.to_indices(label_vocab.eos_token),
                        pad_tag_id=label_vocab.to_indices(label_vocab.padding_token))
Пример #4
0
    def __init__(self, vocab: Vocab, word_dropout_ratio: float = .2) -> None:
        """Instantiating MultiChannelEmbedding class

        Args:
            vocab (gluonnlp.Vocab): the instance of gluonnlp.Vocab
            word_dropout_ratio (float): ratio of replacing token with "<unk>" in the sequence
        """
        super(MultiChannelEmbedding, self).__init__()
        self._static = nn.Embedding.from_pretrained(
            torch.from_numpy(vocab.embedding.idx_to_vec.asnumpy()),
            freeze=True,
            padding_idx=vocab.to_indices(vocab.padding_token))
        self._non_static = nn.Embedding.from_pretrained(
            torch.from_numpy(vocab.embedding.idx_to_vec.asnumpy()),
            freeze=False,
            padding_idx=vocab.to_indices(vocab.padding_token))
        self._word_dropout_ratio = word_dropout_ratio
Пример #5
0
    def __init__(self, lstm_hidden_dim: int, da: int, r: int, vocab: Vocab) -> None:
        """Instantiating SentenceEncoder class

        Args:
            lstm_hidden_dim (int): the number of features in the hidden states in bi-directional lstm
            da (int): the number of features in hidden layer from self-attention
            r (int): the number of aspects of self-attention
            vocab (gluonnlp.Vocab): the instance of gluonnlp.Vocab
        """
        super(SentenceEncoder, self).__init__()
        self._embedding = Embedding(vocab, padding_idx=vocab.to_indices(vocab.padding_token), freeze=False,
                                    permuting=False, tracking=True)
        self._pipe = Linker(permuting=False)
        self._bilstm = BiLSTM(self._embedding._ops.embedding_dim, lstm_hidden_dim, using_sequence=True)
        self._attention = SelfAttention(2 * lstm_hidden_dim, da, r)
Пример #6
0
    def __init__(self, num_classes: int, embedding_dim: int, hidden_dim: int,
                 vocab: Vocab) -> None:
        """Instantiating ConvRec class

        Args:
            num_classes (int): the number of classes
            embedding_dim (int) : the dimension of embedding vector for token
            vocab (gluonnlp.Vocab): the instance of gluonnlp.Vocab
        """
        super(ConvRec, self).__init__()
        self._ops = nn.Sequential(
            Embedding(len(vocab),
                      embedding_dim,
                      vocab.to_indices(vocab.padding_token),
                      permuting=True,
                      tracking=True),
            Conv1d(embedding_dim, hidden_dim, 5, 1, 1, F.relu, tracking=True),
            MaxPool1d(2, 2, tracking=True),
            Conv1d(hidden_dim, hidden_dim, 3, 1, 1, F.relu, tracking=True),
            MaxPool1d(2, 2, tracking=True), Linker(permuting=True),
            BiLSTM(hidden_dim, hidden_dim, using_sequence=False), nn.Dropout(),
            nn.Linear(in_features=2 * hidden_dim, out_features=num_classes))

        self.apply(self._init_weights)