def set_annotations(self, docs: Iterable[Doc], outputs: Dict): """Set udify's output, which is calculated in self.predict, to docs""" try: from udify.models.udify_model import OUTPUTS as UdifyOUTPUTS # type: ignore except ImportError: Errors.E0(package="unofficial-udify") for sent, output in zip(flatten_docs_to_sents(docs), outputs): words = output[UdifyOUTPUTS.words] _doc_tokens = [token.text for token in sent] if words != _doc_tokens: raise ValueError("Internal error has occured." f"Input text: {sent.text}\n" f"Input tokens: {_doc_tokens}\n" f"Model words: {words}") for token, dep, upos, lemma in zip( sent, output[UdifyOUTPUTS.predicted_dependencies], output[UdifyOUTPUTS.upos], output[UdifyOUTPUTS.lemmas], ): token.dep_ = dep token.lemma_ = lemma token.pos_ = upos sent = set_heads(sent, output[UdifyOUTPUTS.predicted_heads]) sent.doc.is_parsed = True
def test_flatten_docs_to_sens(vocab): sentencizer = Sentencizer(".") nlp = spacy.blank("en") nlp.add_pipe(sentencizer) texts = ["Foo is bar. Bar is baz.", "It is a sentence."] docs = nlp.pipe(texts) all_sents = flatten_docs_to_sents(docs) assert len(all_sents) == 3
def predict(self, docs: Iterable[Doc]) -> Dict: self.model.eval() all_sents = flatten_docs_to_sents(docs) with torch.no_grad(): tokens_list = [[t.text for t in sent] for sent in all_sents] instances = [ self.dataset_reader.text_to_instance(tokens) for tokens in tokens_list ] outputs = self.model.forward_on_instances(instances) return outputs