Пример #1
0
 def __init__(self, hidden_size: int):
     super(BahdanauAttention, self).__init__()
     self.hidden_size = hidden_size
     self.attention = nn.Linear(hidden_size * 2,
                                hidden_size).to(get_device())
     self.v = nn.Parameter(torch.rand(hidden_size)).to(get_device())
     stdv = 1. / math.sqrt(self.v.size(0))
     self.v.data.uniform_(-stdv, stdv)
Пример #2
0
 def get_text_summary_from_batch(
         self, batch) -> Tuple[torch.Tensor, torch.Tensor]:
     """
     Obtains original text and target summary indices from batch and transforms to GPU
     :param batch:
     :type batch: torchtext.data.batch.Batch
     :return: Text and summary indices for model
     :rtype: tuple
     """
     text = batch.text[0].to(get_device())
     summary = batch.summary[0].to(get_device())
     return text, summary
Пример #3
0
    def predict(
            self,
            text: str,
            length_of_original_text: float = 0.25) -> Tuple[str, torch.Tensor]:
        """
        Predicts model output / summarizes given text. Obtains summarization with defined maximum percentage of length
        of original text. Returns summarization and attention weights to plot attention heatmap.

        :param text: Original text to summarize
        :type text: str
        :param length_of_original_text: Maximum ratio of summary length comparing to original text
        :type length_of_original_text: float
        :return: summary text and attention weights
        :rtype: tuple
        """
        with torch.no_grad():
            sequence = self.vocab_config.indices_from_text(text).unsqueeze(0)
            sequence_length = sequence.size(1)
            encoder_outputs, encoder_hidden = self.encoder(
                sequence.transpose(0, 1))

            decoder_input = torch.LongTensor([
                self.vocab_config.indices_from_text(
                    Token.StartOfSequence.value)
            ]).to(get_device())
            hidden = encoder_hidden[:self.decoder.n_layers]
            summary_words = [Token.StartOfSequence.value]
            max_summary_length = int(sequence_length * length_of_original_text)
            decoder_attentions = torch.zeros(max_summary_length,
                                             sequence_length)

            for idx in range(max_summary_length):
                output, hidden, decoder_attention = self.decoder(
                    decoder_input,
                    hidden,
                    encoder_outputs,
                )
                decoder_attentions[idx, :decoder_attention.size(2)] += \
                    decoder_attention.squeeze(0).squeeze(0).cpu().data
                top_v, top_i = output.data.topk(1)
                ni = top_i[0]
                if ni == self.vocab_config.indices_from_text(
                        Token.EndOfSequence.value):
                    break
                else:
                    summary_words.append(
                        self.vocab_config.text_from_indices(ni))

                decoder_input = torch.LongTensor([ni]).to(get_device())
            summary_words.append(Token.EndOfSequence.value)
            summary = " ".join(summary_words).lstrip()
            return summary, decoder_attentions
Пример #4
0
 def __init__(self,
              input_size: int,
              embedding_size: int,
              hidden_size: int,
              n_layers: int = 1,
              dropout: float = 0.1):
     super(EncoderRNN, self).__init__()
     self.input_size = input_size
     self.hidden_size = hidden_size
     self.embedding_size = embedding_size
     self.embedding = nn.Embedding(input_size,
                                   embedding_size,
                                   padding_idx=1).to(get_device())
     self.gru = nn.GRU(embedding_size,
                       hidden_size,
                       n_layers,
                       dropout=dropout,
                       bidirectional=True).to(get_device())
Пример #5
0
    def indices_from_text(self, text: str) -> torch.Tensor:
        """
        Converts text token to tensor of corresponding indices.

        :param text: Text to convert:
        :type text: str
        :return: Tensor with indices
        :rtype: torch.Tensor
        """
        indices = [
            self.stoi.get(word, self.stoi.get(Token.Unknown.value))
            for word in text.strip().split(' ')
        ]
        return torch.LongTensor(indices).to(get_device())
Пример #6
0
 def create_optimizers_and_loss(self) -> None:
     """
     Initializes Adam optimizer for Seq2Seq model and learning rate scheduler as specified in config file.
     Initialized CrossEntropyLoss with ignoring padding token <pad> from sequence.
     """
     self.optimizer = optim.Adam(self.seq2seq.parameters(),
                                 lr=self.config['learning_rate'])
     self.scheduler = optim.lr_scheduler.StepLR(
         self.optimizer,
         step_size=self.config['scheduler_step_size'],
         gamma=self.config['scheduler_gamma'],
     )
     self.criterion = nn.CrossEntropyLoss(
         ignore_index=self.vocab_config.stoi[Token.Padding.value]).to(
             get_device())
Пример #7
0
    def __init__(
        self,
        embedding_size: int,
        hidden_size: int,
        output_size: int,
        n_layers: int = 1,
        dropout: float = 0.1,
    ):
        super(DecoderRNN, self).__init__()
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout = dropout

        self.embedding = nn.Embedding(output_size,
                                      embedding_size,
                                      padding_idx=1).to(get_device())
        self.dropout = nn.Dropout(dropout, inplace=True).to(get_device())
        self.attention = BahdanauAttention(hidden_size).to(get_device())
        self.gru = nn.GRU(hidden_size + embedding_size, hidden_size,
                          n_layers).to(get_device())
        self.classifier = nn.Linear(hidden_size * 2,
                                    output_size).to(get_device())
Пример #8
0
    def save_model(self, model_path: str, model_epoch: int) -> None:
        """
        Saves trained model weights after epoch, transferred to CPU.
        Currently attention ``V`` parameter is also saved, cause PyTorch does not supports nn.Parameter saving directly.

        :param model_path: Path to save model
        :type model_path: str
        :param model_epoch: Model epoch
        :type model_epoch: int
        """
        torch.save(self.seq2seq.cpu().state_dict(),
                   model_path + f'_{model_epoch}.pt')
        torch.save(self.seq2seq.decoder.attention.v.cpu(),
                   model_path + f'_att_param_{model_epoch}.pt')
        self.logger.info(f'Saved model {model_path}_{model_epoch}.pt')
        self.seq2seq.to(get_device())
Пример #9
0
    def forward(self,
                text: torch.Tensor,
                summary: torch.Tensor,
                teacher_forcing_ratio: float = 0.5) -> torch.Tensor:
        """
        Defines Seq2Seq structure and flow.
        Teacher forcing ratio specifies probability of altering the decoder output with the target summary token
        for the next word generation. Used to accelerate model learning time.

        * Feeds encoder with input indices
        * Initializes decoder hidden state as encoder hidden state
        * Initializes decoder output with Start of Sequence <sos> token
        * Initializes summary output vector
        * Until the maximum summary length is reached:
            * Feeds decoder with decoder output, hidden state and encoder output
            * Updates decoder output and hidden state
            * Updates summary output vector with decoder output token
            * With teacher_forcing_ratio probability alters decoder output

        :param text: Indices of input text
        :type text: torch.Tensor
        :param summary: Indices of target / reference summary
        :type summary: torch.Tensor
        :param teacher_forcing_ratio:
        :type teacher_forcing_ratio: float
        :return: Output sequence / summary
        :rtype: torch.Tensor
        """
        batch_size = text.size(1)
        max_len = summary.size(0)
        vocab_size = self.decoder.output_size

        encoder_output, hidden = self.encoder(text)
        hidden = hidden[:self.decoder.n_layers]
        output = summary.data[0, :]

        outputs = torch.FloatTensor(max_len, batch_size,
                                    vocab_size).fill_(0).to(get_device())
        for t in range(1, max_len):
            output, hidden, attention_weights = self.decoder(
                output, hidden, encoder_output)
            outputs[t] = output
            is_teacher = random.random() < teacher_forcing_ratio
            top_first = output.data.max(1)[1]
            output = summary.data[t] if is_teacher else top_first
        return outputs
Пример #10
0
 def create_model(self) -> None:
     """
     Initializes full Seq2Seq model with encoder and decoder as specified in config file.
     """
     self.encoder = EncoderRNN(
         input_size=self.config['text_size'],
         embedding_size=self.config['embed_size'],
         hidden_size=self.config['hidden_size'],
         n_layers=2,
         dropout=0.5,
     )
     self.decoder = DecoderRNN(
         embedding_size=self.config['embed_size'],
         hidden_size=self.config['hidden_size'],
         output_size=self.config['text_size'],
         n_layers=1,
         dropout=0.5,
     )
     self.seq2seq = Seq2Seq(encoder=self.encoder,
                            decoder=self.decoder).to(get_device())
Пример #11
0
    def load_model(self,
                   model_path: str,
                   attention_param_path: str = None) -> None:
        """
        Loads trained model and transfers to GPU.
        Currently attention ``V`` parameter is also saved and loaded, cause PyTorch does not supports nn.Parameter
        saving directly.

        :param model_path: Path to trained model
        :type model_path: str
        :param attention_param_path: Path to trained attention parameter
        :type attention_param_path: str
        """
        if attention_param_path:
            self.seq2seq.load_state_dict(torch.load(model_path), strict=False)
            self.seq2seq.decoder.attention.v = nn.Parameter(
                torch.load(attention_param_path))
        else:
            self.seq2seq.load_state_dict(torch.load(model_path))
        self.seq2seq = self.seq2seq.to(get_device())