def neg_keywords(c): horz_ngrams = set(get_horz_ngrams(c.gain, lower=True)) if overlap(["bandwidth"], horz_ngrams) and not overlap(["gain"], horz_ngrams): return FALSE return ABSTAIN
def hertz_units(attr): hertz_units = ["mhz", "khz"] keywords = [ "product", "gain", "gain", "unity", "bandwidth", "gbp", "gbw", "gbwp", ] filter_keywords = ["-3 db", "maximum", "minimum", "impedance"] related_ngrams = set(get_right_ngrams(attr, n_max=1, lower=True)) related_ngrams.update(get_row_ngrams(attr, n_max=1, spread=[-2, 2], lower=True)) cell_ngrams = set(get_cell_ngrams(attr, n_max=1, lower=True)) if "f" in cell_ngrams and "=" in cell_ngrams: return False if attr.get_span().strip() == "0": return False if overlap(filter_keywords, get_row_ngrams(attr, n_max=1, lower=True)): return False if overlap(hertz_units, related_ngrams) and overlap(keywords, related_ngrams): return True return False
def pos_gain_keywords(c): vert_ngrams = set(get_vert_ngrams(c.gain, n_max=1, lower=True)) row_ngrams = set(get_row_ngrams(c.gain, lower=True)) if overlap(["typ", "typ."], vert_ngrams) and overlap(["khz", "mhz"], row_ngrams): return TRUE return ABSTAIN
def LF_part_ce_keywords_horz_prefix(c): return (TRUE if overlap(_CE_KEYWORDS.union(_CE_ABBREVS), get_horz_ngrams(c[1])) and any([ c.part.context.get_span().lower().startswith(_) for _ in get_horz_ngrams(c[1]) ]) and not overlap(_NON_CEV_KEYWORDS, get_horz_ngrams(c[1])) else ABSTAIN)
def LF_part_ce_keywords_in_row_prefix(c): ngrams_part = _filter_non_parts(get_row_ngrams(c[1], n_max=3)) return (TRUE if overlap(_CE_KEYWORDS.union(_CE_ABBREVS), get_row_ngrams(c[1], n_max=3)) and any([ c.part.context.get_span().lower().startswith(_) for _ in ngrams_part ]) and not overlap(_NON_CEV_KEYWORDS, get_row_ngrams(c[1], n_max=3)) and not LF_current_in_row(c) else ABSTAIN)
def LF_ce_keywords_not_part_in_row_col_prefix(c): ngrams_part = set(list(get_col_ngrams(c[1], n_max=3, lower=False))) ngrams_part = _filter_non_parts( ngrams_part.union(set(list(get_row_ngrams(c[1], n_max=3, lower=False))))) return (TRUE if not same_table(c) and overlap( _CE_KEYWORDS.union(_CE_ABBREVS), get_row_ngrams(c[1], n_max=3)) and len(ngrams_part) == 0 and not overlap(_NON_CEV_KEYWORDS, get_row_ngrams(c.part, n_max=3)) and not overlap(_NON_CEV_KEYWORDS, get_row_ngrams(c[1], n_max=3)) and not LF_current_in_row(c) else ABSTAIN)
def pos_gain_header_unit(c): horz_ngrams = set(get_horz_ngrams(c.gain, n_max=1, lower=True)) vert_ngrams = set(get_vert_ngrams(c.gain, n_max=1, lower=True)) right_ngrams = set([ x[0] for x in get_neighbor_cell_ngrams( c.gain, n_max=1, dist=5, directions=True, lower=False) if x[-1] == "RIGHT" ]) if (overlap(["gain", "unity"], horz_ngrams) and overlap(["mhz", "khz"], right_ngrams) and overlap(["typ", "typ."], vert_ngrams)): return TRUE else: return ABSTAIN
def pos_gain(c): row_ngrams = set(get_row_ngrams(c.gain, lower=True)) # print("row_ngrams", row_ngrams) if overlap(["gain"], row_ngrams): return TRUE else: ABSTAIN
def LF_part_ce_keywords_in_rows_cols_prefix_1(c): ngrams = set(list(get_horz_ngrams(c[1]))) ngrams = ngrams.union(set(list(get_vert_ngrams(c[1])))) ngrams_part = _filter_non_parts(ngrams) return (TRUE if overlap(_CE_KEYWORDS.union(_CE_ABBREVS), ngrams) and any( [c.part.context.get_span().lower().startswith(_) for _ in ngrams_part]) else ABSTAIN)
def LF_complement_left_row(c): return (-1 if (overlap( ["complement", "complementary"], chain.from_iterable( [get_row_ngrams(c.part), get_left_ngrams(c.part, window=10)]), )) else 0)
def LF_polarity_complement(c): return (FALSE if overlap( ["complement", "complementary"], chain.from_iterable( [get_sentence_ngrams(c[1]), get_neighbor_sentence_ngrams(c[1])]), ) else ABSTAIN)
def LF_ce_keywords_no_part_horz(c): for _ in get_horz_ngrams(c[1]): if re.match("^([0-9]+[a-zA-Z]+|[a-zA-Z]+[0-9]+)[0-9a-zA-Z]*$", _.upper()): return ABSTAIN return (TRUE if overlap(_CE_KEYWORDS.union(_CE_ABBREVS), get_horz_ngrams(c[1])) else ABSTAIN)
def LF_ce_keywords_no_part_in_rows(c): for _ in get_row_ngrams(c[1], n_max=3, lower=False): if re.match("^([0-9]+[a-zA-Z]+|[a-zA-Z]+[0-9]+)[0-9a-zA-Z]*$", _.upper()): return ABSTAIN return (TRUE if overlap(_CE_KEYWORDS.union(_CE_ABBREVS), get_row_ngrams(c[1], n_max=3)) else ABSTAIN)
def ce_v_max_conditions(attr): ngrams = set(get_row_ngrams(attr, n_max=1)) if not overlap(ce_keywords.union(ce_abbrevs), ngrams): return False if any(_ in attr.sentence.text.lower() for _ in ["vcb", "base"]): return False return True
def LF_complement_left_row(c): """Return False if temp mention's ngrams align with the following keywords.""" return (FALSE if (overlap( ["complement", "complementary"], chain.from_iterable( [get_row_ngrams(c.part), get_left_ngrams(c.part, window=10)]), )) else ABSTAIN)
def LF_collector_aligned(c): return (FALSE if overlap( [ "collector", "collector-current", "collector-base", "collector-emitter" ], list(get_aligned_ngrams(c[1])), ) else ABSTAIN)
def LF_collector_aligned(c): return (-1 if overlap( [ "collector", "collector-current", "collector-base", "collector-emitter" ], list(get_aligned_ngrams(c.temp)), ) else 0)
def LF_voltage_row_part(c): return ( FALSE if overlap( ["voltage", "cbo", "ceo", "ebo", "v"], list(get_aligned_ngrams(c.temp)) ) else ABSTAIN )
def LF_bad_keywords_in_row(c): return ( FALSE if overlap( ["continuous", "cut-off", "gain", "breakdown"], get_row_ngrams(c.volt) ) else ABSTAIN )
def LF_collector_aligned(c): """Return False if temp mention's ngrams align with the following keywords.""" return (FALSE if overlap( [ "collector", "collector-current", "collector-base", "collector-emitter" ], list(get_aligned_ngrams(c.temp)), ) else ABSTAIN)
def LF_not_temp_relevant(c): return ( FALSE if not overlap( ["storage", "temperature", "tstg", "stg", "ts"], list(get_aligned_ngrams(c.temp)), ) else ABSTAIN )
def current_units(attr): # NOTE: These two symbols for mu are unique, not duplicates. current_units = ["ma", "μa", "ua", "µa", "\uf06da"] keywords = ["supply", "quiescent", "iq", "is", "idd", "icc"] filter_keywords = ["offset", "bias", "logic", "shutdown"] related_ngrams = set(get_right_ngrams(attr, n_max=1, lower=True)) related_ngrams.update(get_row_ngrams(attr, n_max=1, spread=[-5, 5], lower=True)) if attr.get_span().strip() == "0": return False if overlap(filter_keywords, get_row_ngrams(attr, n_max=1, lower=True)): return False if overlap(current_units, related_ngrams) and overlap(keywords, related_ngrams): return True return False
def neg_gain_keywords_in_right_cell(c): right_ngrams = set([ x[0] for x in get_neighbor_cell_ngrams( c[0], n_max=1, dist=5, directions=True, lower=False) if x[-1] == "RIGHT" ]) if not overlap(["kHz", "MHz", "GHz"], right_ngrams): return FALSE else: return ABSTAIN
def LF_polarity_transistor_type(c): return (TRUE if overlap( [ "silicon", "power", "darlington", "epitaxial", "low noise", "ampl/switch", "switch", "surface", "mount", ], chain.from_iterable( [get_sentence_ngrams(c[1]), get_neighbor_sentence_ngrams(c[1])]), ) else ABSTAIN)
def neg_gain_keywords_in_column(c): col_ngrams = set(get_col_ngrams(c.gain, n_max=1, lower=True)) if overlap( [ "max", "min", "test", "condition", "conditions", "vgn", "f", "-3", "db", "dbc", ], col_ngrams, ): return FALSE else: return ABSTAIN
def neg_gain_keywords_in_row(c): row_ngrams = set(get_row_ngrams(c.gain, n_max=1, lower=True)) if overlap( [ "small", "full", "flat", "current", "thd", "signal", "flatness", "input", "noise", "f=", "f", "-3", "power", "db", "dbm", "output", "impedence", "delay", "capacitance", "range", "ratio", "dbc", "temperature", "common", "voltage", "range", ], row_ngrams, ): return FALSE else: return ABSTAIN
def LF_typ_row(c): return -1 if overlap(["typ", "typ."], list(get_row_ngrams(c.temp))) else 0
def LF_voltage_row_temp(c): return (-1 if overlap(["voltage", "cbo", "ceo", "ebo", "v"], list(get_aligned_ngrams(c.temp))) else 0)
def LF_current_aligned(c): return (-1 if overlap(["current", "dc", "ic"], list(get_aligned_ngrams(c.temp))) else 0)
def LF_test_condition_aligned(c): return -1 if overlap(["test", "condition"], list(get_aligned_ngrams( c.temp))) else 0