Пример #1
0
            def forward(
                self,
                texts: Optional[List[str]] = None,
                multi_texts: Optional[List[List[str]]] = None,
                tokens: Optional[List[List[str]]] = None,
                languages: Optional[List[str]] = None,
            ):
                # PyTorch breaks with 2 'not None' checks right now.
                if texts is not None:
                    if tokens is not None:
                        raise RuntimeError("Can't set both tokens and texts")
                    if self.tokenizer is not None:
                        tokens = [[
                            t[0] for t in self.tokenizer.tokenize(text)
                        ] for text in texts]

                if tokens is None:
                    raise RuntimeError("tokens is required")

                tokens = truncate_tokens(tokens, self.max_seq_len,
                                         self.vocab.pad_token)
                seq_lens = make_sequence_lengths(tokens)
                word_ids = self.vocab.lookup_indices_2d(tokens)
                word_ids = pad_2d(word_ids, seq_lens, self.pad_idx)
                logits = self.model(torch.tensor(word_ids),
                                    torch.tensor(seq_lens))
                return self.output_layer(logits)
Пример #2
0
            def forward(
                self,
                texts: Optional[List[str]] = None,
                multi_texts: Optional[List[List[str]]] = None,
                tokens: Optional[List[List[str]]] = None,
                languages: Optional[List[str]] = None,
                dense_feat: Optional[List[List[float]]] = None,
            ):
                if tokens is None:
                    raise RuntimeError("tokens is required")
                if dense_feat is None:
                    raise RuntimeError("dense_feat is required")

                tokens = truncate_tokens(tokens, self.max_seq_len, self.vocab.pad_token)
                seq_lens = make_sequence_lengths(tokens)
                word_ids = self.vocab.lookup_indices_2d(tokens)
                word_ids = pad_2d(word_ids, seq_lens, self.pad_idx)
                token_bytes, _ = make_byte_inputs(
                    tokens, self.max_byte_len, self.byte_offset_for_non_padding
                )
                dense_feat = self.normalizer.normalize(dense_feat)
                logits = self.model(
                    torch.tensor(word_ids),
                    token_bytes,
                    torch.tensor(seq_lens),
                    torch.tensor(dense_feat, dtype=torch.float),
                )
                return self.output_layer(logits)
Пример #3
0
            def forward(
                self,
                texts: Optional[List[str]] = None,
                tokens: Optional[List[List[str]]] = None,
                languages: Optional[List[str]] = None,
                dense_feat: Optional[List[List[float]]] = None,
            ):
                if tokens is None:
                    raise RuntimeError("tokens is required")
                if dense_feat is None:
                    raise RuntimeError("dense_feat is required")

                trimmed_tokens: List[List[str]] = []
                if self.max_seq_len >= 0:
                    for token in tokens:
                        trimmed_tokens.append(token[0:self.max_seq_len])
                else:
                    trimmed_tokens = tokens

                seq_lens = make_sequence_lengths(trimmed_tokens)
                word_ids = self.vocab.lookup_indices_2d(trimmed_tokens)
                word_ids = pad_2d(word_ids, seq_lens, self.pad_idx)
                dense_feat = self.normalizer.normalize(dense_feat)
                logits = self.model(
                    torch.tensor(word_ids),
                    torch.tensor(seq_lens),
                    torch.tensor(dense_feat, dtype=torch.float),
                )
                return self.output_layer(logits)
Пример #4
0
 def forward(self, tokens: List[List[str]]):
     seq_lens = make_sequence_lengths(tokens)
     word_ids = self.vocab.lookup_indices_2d(tokens)
     word_ids = pad_2d(word_ids, seq_lens, self.pad_idx)
     token_bytes, _ = make_byte_inputs(
         tokens, self.max_byte_len,
         self.byte_offset_for_non_padding)
     logits = self.model(torch.tensor(word_ids), token_bytes,
                         torch.tensor(seq_lens))
     return self.output_layer(logits)
Пример #5
0
 def forward(self, tokens: List[List[str]], dense_feat: List[List[float]]):
     seq_lens = make_sequence_lengths(tokens)
     word_ids = self.vocab.lookup_indices_2d(tokens)
     word_ids = pad_2d(word_ids, seq_lens, self.pad_idx)
     dense_feat = self.normalizer.normalize(dense_feat)
     logits = self.model(
         torch.tensor(word_ids),
         torch.tensor(seq_lens),
         torch.tensor(dense_feat, dtype=torch.float),
     )
     return self.output_layer(logits)
Пример #6
0
            def forward(
                self,
                texts: Optional[List[str]] = None,
                tokens: Optional[List[List[str]]] = None,
                languages: Optional[List[str]] = None,
            ):
                if tokens is None:
                    raise RuntimeError("tokens is required")

                seq_lens = make_sequence_lengths(tokens)
                word_ids = self.vocab.lookup_indices_2d(tokens)
                word_ids = pad_2d(word_ids, seq_lens, self.pad_idx)
                logits = self.model(torch.tensor(word_ids),
                                    torch.tensor(seq_lens))
                return self.output_layer(logits)
Пример #7
0
            def forward(
                self,
                texts: Optional[List[str]] = None,
                multi_texts: Optional[List[List[str]]] = None,
                tokens: Optional[List[List[str]]] = None,
                languages: Optional[List[str]] = None,
            ):
                if tokens is None:
                    raise RuntimeError("tokens is required")

                tokens = truncate_tokens(tokens, self.max_seq_len, "__PAD__")
                seq_lens = make_sequence_lengths(tokens)
                token_bytes, _ = make_byte_inputs(
                    tokens, self.max_byte_len,
                    self.byte_offset_for_non_padding)
                logits = self.model(token_bytes, torch.tensor(seq_lens))
                return self.output_layer(logits)
Пример #8
0
            def forward(
                self,
                texts: Optional[List[str]] = None,
                tokens: Optional[List[List[str]]] = None,
                languages: Optional[List[str]] = None,
                dense_feat: Optional[List[List[float]]] = None,
            ):
                if tokens is None:
                    raise RuntimeError("tokens is required")

                seq_lens = make_sequence_lengths(tokens)
                word_ids = self.vocab.lookup_indices_2d(tokens)
                word_ids = pad_2d(word_ids, seq_lens, self.pad_idx)
                if dense_feat is not None:
                    dense_feat = self.normalizer.normalize(dense_feat)
                else:
                    raise RuntimeError("dense is required")
                logits = self.model(
                    torch.tensor(word_ids),
                    torch.tensor(seq_lens),
                    torch.tensor(dense_feat, dtype=torch.float),
                )
                return self.output_layer(logits)
Пример #9
0
 def forward(self, tokens: List[List[str]]):
     seq_lens = make_sequence_lengths(tokens)
     word_ids = self.vocab.lookup_indices_2d(tokens)
     word_ids = pad_2d(word_ids, seq_lens, self.pad_idx)
     logits = self.model(torch.tensor(word_ids), torch.tensor(seq_lens))
     return self.output_layer(logits)