def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_id: str, user_id: str):
        for sentence in cas.select(SENTENCE_TYPE):
            cas_tokens = list(cas.select_covered(TOKEN_TYPE, sentence))
            tokens = [t.get_covered_text() for t in cas_tokens]

            grouped_bert_tokens = self._tokenize_bert(tokens)
            predictions = self._predict(grouped_bert_tokens)

            grouped_predictions = self._align_tokens(tokens, grouped_bert_tokens, predictions)

            for token, grouped_prediction in zip(cas_tokens, grouped_predictions):
                begin = token.begin
                end = token.end
                label = Counter([self._label_map[pred] for pred in grouped_prediction]).most_common(1)[0][0]
                prediction = self.create_prediction(cas, layer, feature, begin, end, label)
                cas.add_annotation(prediction)
def featurize_cas(fg: FeatureGenerator, cas: Cas) -> List:
    features = get_features()

    results = []

    for qid, entity in enumerate(cas.select("webanno.custom.EntityLinking")):
        candidates = list(
            cas.select_covered("inception.internal.KbHandle", entity))

        if len(candidates) == 0:
            continue

        for i, candidate in enumerate(candidates):
            if entity.iri == candidate.iri:
                gold_idx = i
                break
        else:
            continue

        sentences = list(
            cas.select_covering(
                "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence",
                entity))
        assert len(sentences) == 1
        sentence = sentences[0]

        mention = entity.get_covered_text().lower()
        context = sentence.get_covered_text().lower()
        l = len(context)
        # context = context[int(l * 0.25):int(l * 0.75)]

        for cid, candidate in enumerate(candidates):
            score = float(entity.iri == candidate.iri)
            query = candidate.query
            label = candidate.label.lower()

            result = fg.featurize_candidate(qid, cid, "inception_rank", score,
                                            mention, context, label or "",
                                            candidate.description or "",
                                            entity.iri, gold_idx,
                                            candidate.iri, features)

            result.update(fg.featurize_query(mention, query, label))

            results.append(result)

    return results
    def predict(self, cas: Cas, layer: str, feature: str, project_id: str,
                document_id: str, user_id: str):
        model: Optional[sklearn_crfsuite.CRF] = self._load_model(user_id)

        if model is None:
            logger.debug("No trained model ready yet!")
            return

        all_tokens = []
        featurized_sentences = []

        for sentence in cas.select(SENTENCE_TYPE):
            tokens = list(cas.select_covered(TOKEN_TYPE, sentence))
            words = [token.get_covered_text() for token in tokens]

            all_tokens.append(tokens)
            featurized_sentences.append(self._sent2features(words))

        all_predictions = model.predict(featurized_sentences)

        assert len(all_predictions) == len(all_tokens)
        for predictions, tokens in zip(all_predictions, all_tokens):
            assert len(predictions) == len(tokens)

            begin = None
            end = None
            prev_tag = "O"
            for tag, token in zip(predictions, tokens):
                if begin is not None and end is not None:
                    if tag == "O" or (tag.startswith("B")
                                      and prev_tag.startswith("I")):
                        prediction = create_prediction(cas, layer, feature,
                                                       begin, end, "X")
                        cas.add_annotation(prediction)

                if tag.startswith("B"):
                    begin = token.begin
                    end = token.end
                elif tag.startswith("I"):
                    end = token.end
                else:
                    begin = None
                    end = None

                prev_tag = tag
Exemplo n.º 4
0
    def predict(self, cas: Cas, layer: str, feature: str, project_id: str,
                document_id: str, user_id: str):

        sentences = cas.select(SENTENCE_TYPE)

        src_tokens = cas.select_covered("webanno.custom.Base", sentences[0])
        trg_tokens = cas.select_covered("webanno.custom.Base", sentences[1])

        src_sentence = [e.get_covered_text() for e in src_tokens]
        trg_sentence = [e.get_covered_text() for e in trg_tokens]

        print(src_sentence)
        print(trg_sentence)

        alignments = self._aligner.get_word_aligns(src_sentence, trg_sentence)

        Relation = cas.typesystem.get_type(layer)
        print(list(Relation.all_features))

        for matching_method in alignments:
            for source_idx, target_idx in alignments[matching_method]:
                src = src_tokens[source_idx]
                target = trg_tokens[target_idx]
                prediction = Relation(
                    Governor=src,
                    Dependent=target,
                    begin=target.begin,
                    end=target.end,
                    inception_internal_predicted=True,
                )
                # setattr(prediction, feature, f"{src.get_covered_text()} -> {target.get_covered_text()}")
                setattr(prediction, feature, "")
                print(source_idx, target_idx, prediction)

                cas.add_annotation(prediction)
            break
Exemplo n.º 5
0
 def get_mappable_ann(self, cas: Cas, t: Type):
     return next(cas.select_covered(FeatureExtractor.MAPPABLE_TYPE, t))