def test_parse_postscript_no_links(self): """ Test for parsing postscript with no links """ options = 0 options |= (BIT_RWALL | BIT_CAPS) options &= ~BIT_STRIP tokens, links = parse_postscript(self.post_no_links, options) self.assertEqual(0, len(links))
def test_parse_postscript_alice_bug_002(self): """ Gutenberg Children bug test """ options = 0 # options |= (BIT_RWALL | BIT_CAPS) options &= ~BIT_STRIP tokens, links = parse_postscript(alice_bug_002, options) self.assertEqual(29, len(tokens), tokens)
def test_parse_postscript_gutenchildren_bug(self): """ Test for number of tokens (bug from Gutenberg Children corpus) """ options = 0 # options |= (BIT_RWALL | BIT_CAPS) # options &= ~BIT_STRIP tokens, links = parse_postscript(gutenberg_children_bug, options) self.assertEqual(18, len(tokens))
def test_parse_postscript_gutenchildren_bug_002(self): options = BIT_NO_LWALL | BIT_NO_PERIOD | BIT_STRIP tokens, links = parse_postscript(gutenberg_children_bug_002, options) print(tokens) self.assertEqual(12, len(tokens)) self.assertEqual(6, len(links))
def test_parse_postscript_all_walls(self): """ Test for parsing postscript with both walls in """ options = 0 options |= (BIT_RWALL | BIT_CAPS) options &= ~BIT_STRIP tokens, links = parse_postscript(self.post_all_walls, options) pm = parse_metrics(tokens) self.assertEqual(1.0, pm.completely_parsed_ratio) self.assertEqual(0.0, pm.completely_unparsed_ratio) self.assertEqual(1.0, pm.average_parsed_ratio)
def test_get_link_set(self): """ Test for link extraction according to set options """ # post_all_walls = "[(LEFT-WALL)(Dad[!])(was.v-d)(not.e)(a)(parent.n)(before)(.)(RIGHT-WALL)]" \ # "[[0 7 2 (Xp)][0 1 0 (Wd)][1 2 0 (Ss*s)][2 5 1 (Osm)][2 3 0 (EBm)]" \ # "[4 5 0 (Ds**c)][5 6 0 (Mp)][7 8 0 (RW)]][0]" expected_set = {(1, 2), (2, 5), (2, 3), (4, 5), (5, 6)} options = BIT_NO_LWALL | BIT_NO_PERIOD | BIT_STRIP | BIT_PARSE_QUALITY tokens, links = parse_postscript(self.post_all_walls, options) result_set = get_link_set(tokens, links, options) self.assertTrue(result_set == expected_set)
def test_parse_tokens_sharp(self): """ Test for proper parsing of sharp sign prefixes """ options = BIT_STRIP # | BIT_NO_LWALL | BIT_NO_PERIOD # tokens = parse_tokens(sharp_sign_ps_tokens.replace("\n", ""), options)[0] tokens, links = parse_postscript(sharp_sign_ps_linkages, options) self.assertEqual(len(sharp_sign_tokens), len(tokens)) self.assertEqual(sharp_sign_tokens, tokens) self.assertEqual(sharp_sign_links, set(links))
def test_parse_postscript_explosion_no_linkages(self): options = BIT_NO_LWALL | BIT_NO_PERIOD | BIT_STRIP tokens, links = parse_postscript(explosion_no_linkages.replace("\n", ""), options) print(tokens) print(links) self.assertEqual(27, len(tokens)) self.assertEqual(0, len(links))
def test_parse_postscript_alice_bug_001(self): """ test_parse_postscript """ # print(__doc__, sys.stderr) options = 0 # options |= (BIT_RWALL | BIT_CAPS) options &= ~BIT_STRIP tokens, links = parse_postscript(alice_bug_001, options) self.assertEqual(15, len(tokens)) for link in links: self.assertTrue(link[0] < 15 and link[1] < 15, str(link))