def test_extract_nicknames_from_after_symbols(): text = "The architecture consists of SYMBOL dense layers trained with SYMBOL learning rate." symbol_texs = {29: "L_d", 62: r"\alpha"} tokens, pos = list( zip(*[ ("The", "DT"), ("architecture", "NN"), ("consists", "VBZ"), ("of", "IN"), ("SYMBOL", "NN"), ("dense", "JJ"), ("layers", "NNS"), ("trained", "VBN"), ("with", "IN"), ("SYMBOL", "NN"), ("learning", "NN"), ("rate", "NN"), (".", "."), ])) symbol_nickname_pairs = get_symbol_nickname_pairs(text, tokens, pos, symbol_texs) assert len(symbol_nickname_pairs) == 2 nickname0 = symbol_nickname_pairs[0] assert nickname0.term_text == "L_d" assert nickname0.definition_text == "dense layers" nickname1 = symbol_nickname_pairs[1] assert nickname1.term_text == r"\alpha" assert nickname1.definition_text == "learning rate"
def test_extract_nicknames_from_before_symbols(): text = "The agent acts with a policy SYMBOL in each timestep SYMBOL." symbol_texs = {29: r"\pi", 53: "t"} tokens, pos = list( zip(*[ ("The", "DT"), ("agent", "NN"), ("acts", "VBZ"), ("with", "IN"), ("a", "DT"), ("policy", "NN"), ("SYMBOL", "NN"), ("in", "IN"), ("each", "DT"), ("timestep", "NN"), ("SYMBOL", "NN"), (".", "."), ])) symbol_nickname_pairs = get_symbol_nickname_pairs(text, tokens, pos, symbol_texs) assert len(symbol_nickname_pairs) == 2 nickname0 = symbol_nickname_pairs[0] assert nickname0.term_text == r"\pi" assert nickname0.definition_text == "policy" nickname1 = symbol_nickname_pairs[1] assert nickname1.term_text == "t" assert nickname1.definition_text == "timestep"
def test_extract_nicknames_symbols_parentheses(): text = "The agent acts with policy (SYMBOL)." symbol_texs = {28: r"\pi"} tokens, pos = list( zip( *[ ("The", "DT"), ("agent", "NN"), ("acts", "VBZ"), ("with", "IN"), ("policy", "NN"), ("(", "-LRB-"), ("SYMBOL", "NN"), (")", "-RRB-"), (".", "."), ] ) ) symbol_nickname_pairs = get_symbol_nickname_pairs(text, tokens, pos, symbol_texs) assert len(symbol_nickname_pairs) == 1 nickname0 = symbol_nickname_pairs[0] assert nickname0.term_text == r"\pi" assert nickname0.definition_text == "policy"
def test_extract_nicknames_symbols_filter(): text = "The agent acts with SYMBOL SYMBOL." symbol_texs = {20: r"\pi", 27: "p"} tokens, pos = list( zip(*[ ("The", "DT"), ("agent", "NN"), ("acts", "VBZ"), ("with", "IN"), ("SYMBOL", "NN"), ("SYMBOL", "NN"), (".", "."), ])) symbol_nickname_pairs = get_symbol_nickname_pairs(text, tokens, pos, symbol_texs) assert len(symbol_nickname_pairs) == 0
def test_extract_nickname_for_th_index_pattern(): text = "This process repeats for every SYMBOLth timestep." symbol_texs = {31: "k"} tokens, pos = list( zip(*[ ("This", "DT"), ("process", "NN"), ("repeats", "NNS"), ("for", "IN"), ("every", "DT"), ("SYMBOLth", "JJ"), ("timestep", "NN"), (".", "."), ])) symbol_nickname_pairs = get_symbol_nickname_pairs(text, tokens, pos, symbol_texs) assert len(symbol_nickname_pairs) == 1 nickname0 = symbol_nickname_pairs[0] assert nickname0.term_text == "k" assert nickname0.definition_text == "timestep"
def test_extract_nicknames_symbols_separated_by_colon(): text = "The agent acts with SYMBOL : policy." symbol_texs = {20: r"\pi"} tokens, pos = list( zip(*[ ("The", "DT"), ("agent", "NN"), ("acts", "VBZ"), ("with", "IN"), ("SYMBOL", "NN"), (":", ":"), ("policy", "NN"), (".", "."), ])) symbol_nickname_pairs = get_symbol_nickname_pairs(text, tokens, pos, symbol_texs) assert len(symbol_nickname_pairs) == 1 nickname0 = symbol_nickname_pairs[0] assert nickname0.term_text == r"\pi" assert nickname0.definition_text == "policy"