def test_rjg20(): link = "https://people.bath.ac.uk/rjg20/index.html" filename = "../data/rjg20-index.html" with open(filename) as file: expect = file.read().splitlines() link_str = wt.open_page(link) link_lines = link_str.splitlines() assert link_lines == expect
def test_count_rjg20(): ''' Test count words in page ''' link = "https://people.bath.ac.uk/rjg20/index.html" expect = 679 text = wt.open_page(link) assert wt.count_words(text) == expect
def test_count_hamlet(): ''' Test count words in the Hamlet ''' link = "http://www.gutenberg.org/cache/epub/2265/pg2265.txt" expect = 32211 text = wt.open_page(link) assert wt.count_words(text) == expect
def test_count_macbeth(): ''' Test count words in the Scottish play ''' link = "http://www.gutenberg.org/cache/epub/2264/pg2264.txt" expect = 20347 text = wt.open_page(link) assert wt.count_words(text) == expect
def test_occs_macbeth4(): ''' Test count occs in the Scottish play ''' link = "http://www.gutenberg.org/cache/epub/2264/pg2264.txt" words = ["Macb.", "Macd.", "Lady."] expect = [137, 58, 41] text = wt.open_page(link) assert wt.count_occs(text, words) == expect
def test_occs_macbeth3(): ''' Test count occs in the Scottish play ''' link = "http://www.gutenberg.org/cache/epub/2264/pg2264.txt" words = ["Macbeth", "Macduff"] expect = [70, 28] text = wt.open_page(link) assert wt.count_occs(text, words) == expect
def test_strip_macbeth_stop(): ''' Test split and strip macbeth on 'Actus' using stop values ''' link = "http://www.gutenberg.org/cache/epub/2264/pg2264.txt" text = wt.open_page(link) key_word = 'Actus' for i in range(1,6): expect = i split = wt.split_strip_text(text, key_word, stop = i) assert len(split) == expect
def test_split_macbeth(): ''' Test split macbeth on 'Actus' ''' link = "http://www.gutenberg.org/cache/epub/2264/pg2264.txt" expect = 6 text = wt.open_page(link) key_word = 'Actus' split = wt.split_strip_text(text, key_word) assert len(split) == expect
def test_strip_macbeth_both(): ''' Test split and strip macbeth on 'Actus' using stop values ''' link = "http://www.gutenberg.org/cache/epub/2264/pg2264.txt" text = wt.open_page(link) key_word = 'Actus' for i in range(0,6): for j in range(i+1,6): expect = j - i split = wt.split_strip_text(text, key_word, start = i, stop = j) assert len(split) == expect, str(i)+'is i and j is '+str(j)