def LF_complement_left_row(c): return (-1 if (overlap( ["complement", "complementary"], chain.from_iterable( [get_row_ngrams(c.part), get_left_ngrams(c.part, window=10)]), )) else 0)
def get_word_feats(span): attrib = "words" if span.stable_id not in unary_word_feats: unary_word_feats[span.stable_id] = set() for ngram in tokens_to_ngrams(span.get_attrib_tokens(attrib), n_min=1, n_max=2): feature = f"CONTAINS_{attrib.upper()}_[{ngram}]" unary_word_feats.add(feature) for ngram in get_left_ngrams( span, window=settings["featurization"]["content"]["word_feature"] ["window"], n_max=2, attrib=attrib, ): feature = f"LEFT_{attrib.upper()}_[{ngram}]" unary_word_feats.add(feature) for ngram in get_right_ngrams( span, window=settings["featurization"]["content"]["word_feature"] ["window"], n_max=2, attrib=attrib, ): feature = f"RIGHT_{attrib.upper()}_[{ngram}]" unary_word_feats.add(feature) for f in unary_word_feats[span.stable_id]: yield f
def LF_negative_number_left(c): return ( TRUE if any( [re.match(r"-\s*\d+", ngram) for ngram in get_left_ngrams(c.temp, window=4)] ) else ABSTAIN )
def LF_complement_left_row(c): """Return False if temp mention's ngrams align with the following keywords.""" return (FALSE if (overlap( ["complement", "complementary"], chain.from_iterable( [get_row_ngrams(c.part), get_left_ngrams(c.part, window=10)]), )) else ABSTAIN)
def _get_word_feats(span: SpanMention) -> Iterator[str]: attrib = "words" if span.stable_id not in unary_word_feats: unary_word_feats[span.stable_id] = set() for ngram in tokens_to_ngrams(span.get_attrib_tokens(attrib), n_min=1, n_max=2): feature = f"CONTAINS_{attrib.upper()}_[{ngram}]" unary_word_feats[span.stable_id].add(feature) for ngram in get_left_ngrams( span, window=settings["featurization"]["textual"]["word_feature"] ["window"], n_max=2, attrib=attrib, ): feature = f"LEFT_{attrib.upper()}_[{ngram}]" unary_word_feats[span.stable_id].add(feature) for ngram in get_right_ngrams( span, window=settings["featurization"]["textual"]["word_feature"] ["window"], n_max=2, attrib=attrib, ): feature = f"RIGHT_{attrib.upper()}_[{ngram}]" unary_word_feats[span.stable_id].add(feature) unary_word_feats[span.stable_id].add(( f"SPAN_TYPE_[" f"{'IMPLICIT' if isinstance(span, ImplicitSpanMention) else 'EXPLICIT'}" f"]")) if span.get_span()[0].isupper(): unary_word_feats[span.stable_id].add("STARTS_WITH_CAPITAL") unary_word_feats[span.stable_id].add(f"LENGTH_{span.get_num_words()}") for f in unary_word_feats[span.stable_id]: yield f
def LF_to_left(c): return 1 if "to" in get_left_ngrams(c.temp, window=2) else 0
def LF_to_left(c): return TRUE if "to" in get_left_ngrams(c[1], window=2) else ABSTAIN
def LF_negative_number_left(c): """Return True if temp mention's left ngrams contain negative number.""" return (TRUE if any([ re.match(r"-\s*\d+", ngram) for ngram in get_left_ngrams(c.temp, window=4) ]) else ABSTAIN)
def LF_to_left(c): """Return True if temp mention's left ngrams contain ``to''.""" return TRUE if "to" in get_left_ngrams(c.temp, window=2) else ABSTAIN