def forward( self, texts: Optional[List[str]] = None, multi_texts: Optional[List[List[str]]] = None, tokens: Optional[List[List[str]]] = None, languages: Optional[List[str]] = None, dense_feat: Optional[List[List[float]]] = None, ): if tokens is None: raise RuntimeError("tokens is required") if dense_feat is None: raise RuntimeError("dense_feat is required") tokens = truncate_tokens(tokens, self.max_seq_len, self.vocab.pad_token) seq_lens = make_sequence_lengths(tokens) word_ids = self.vocab.lookup_indices_2d(tokens) word_ids = pad_2d(word_ids, seq_lens, self.pad_idx) token_bytes, _ = make_byte_inputs( tokens, self.max_byte_len, self.byte_offset_for_non_padding ) dense_feat = self.normalizer.normalize(dense_feat) logits = self.model( torch.tensor(word_ids), token_bytes, torch.tensor(seq_lens), torch.tensor(dense_feat, dtype=torch.float), ) return self.output_layer(logits)
def forward( self, texts: Optional[List[str]] = None, multi_texts: Optional[List[List[str]]] = None, tokens: Optional[List[List[str]]] = None, languages: Optional[List[str]] = None, ): # PyTorch breaks with 2 'not None' checks right now. if texts is not None: if tokens is not None: raise RuntimeError("Can't set both tokens and texts") if self.tokenizer is not None: tokens = [[ t[0] for t in self.tokenizer.tokenize(text) ] for text in texts] if tokens is None: raise RuntimeError("tokens is required") tokens = truncate_tokens(tokens, self.max_seq_len, self.vocab.pad_token) seq_lens = make_sequence_lengths(tokens) word_ids = self.vocab.lookup_indices_2d(tokens) word_ids = pad_2d(word_ids, seq_lens, self.pad_idx) logits = self.model(torch.tensor(word_ids), torch.tensor(seq_lens)) return self.output_layer(logits)
def forward( self, texts: Optional[List[str]] = None, multi_texts: Optional[List[List[str]]] = None, tokens: Optional[List[List[str]]] = None, languages: Optional[List[str]] = None, ): if tokens is None: raise RuntimeError("tokens is required") tokens = truncate_tokens(tokens, self.max_seq_len, self.vocab.pad_token) seq_lens = make_sequence_lengths(tokens) word_ids = self.vocab.lookup_indices_2d(tokens) word_ids = pad_2d(word_ids, seq_lens, self.pad_idx) logits = self.model(torch.tensor(word_ids), torch.tensor(seq_lens)) return self.output_layer(logits)
def forward( self, texts: Optional[List[str]] = None, multi_texts: Optional[List[List[str]]] = None, tokens: Optional[List[List[str]]] = None, languages: Optional[List[str]] = None, ): if tokens is None: raise RuntimeError("tokens is required") tokens = truncate_tokens(tokens, self.max_seq_len, "__PAD__") seq_lens = make_sequence_lengths(tokens) token_bytes, _ = make_byte_inputs( tokens, self.max_byte_len, self.byte_offset_for_non_padding) logits = self.model(token_bytes, torch.tensor(seq_lens)) return self.output_layer(logits)