def test_sanity():
    """Feed output of a class into itself"""
    text_a = TextBody(open('moby_dick.txt').read())
    words_before = text_a.words[:]
    copy = ' '.join(text_a.words)
    text_b = TextBody(copy)
    assert_(words_before, text_b.words)
 def test_average_words(self):
     """Simple average length."""
     text = TextBody("white whale")
     self.assertEqual(text.get_average_word_length(), 5)
예제 #3
0
def test_count_word_complex():
    """Count word in a longer text"""
    text = TextBody(MOBYDICK_SUMMARY)
    counter = WordCounter(text)
    assert_equal(counter.count_word("white"), 2)
예제 #4
0
def test_count_word_simple():
    """Count word in a short text"""
    text = TextBody("the white white whale")
    counter = WordCounter(text)
    assert_equal(counter.count_word("white"), 2)
 def test_average_words_complex(self):
     """Complex average length."""
     text = TextBody(MOBYDICK_SUMMARY)
     self.assertAlmostEqual(text.get_average_word_length(), 4.0, 3)
 def test_average_words(self):
     """Simple average length."""
     text = TextBody("white whale")
     self.assertEqual(text.get_average_word_length(), 5)
예제 #7
0
def test_word_number_text():
    """Count words in a text paragraph"""
    text = TextBody(MOBYDICK_SUMMARY)
    assert_equal(text.word_number, 54)
def test_biggest():
    """An entire book works."""
    text = TextBody(open('moby_dick.txt').read())
    assert_(text.word_number, 200000)
예제 #9
0
def test_word_number_one():
    """Count single word in a text"""
    text = TextBody("one_word")
    assert text.word_number == 1
예제 #10
0
 def setUp(self):
     """Prepare before each test"""
     MOBYDICK_SUMMARY = open('mobydick_summary.txt').read()
     self.text = TextBody(MOBYDICK_SUMMARY)
     self.counter = WordCounter(self.text)
#!/usr/bin/env python

from word_counter import TextBody
import sys

def get_top_words(text, n):
    '''Returns the n most frequent words.'''
    d = {}

    for word in text.words:
        d.setdefault(word, 0)
        d[word] += 1

    data = [(d[word], word) for word in d]
    data.sort()
    data.reverse()
    return data[:n]


if __name__ == '__main__':
    print('usage:\npython word_report.py <filename> <number>')
    textfile = sys.argv[1]
    number = int(sys.argv[2])
    text = TextBody(open(textfile).read())
    for count, word in get_top_words(text, number):
        print(word, count)

#!/usr/bin/env python
#
# example of a test generator
#

from nose.tools import assert_equal
from word_counter import TextBody, WordCounter

MOBYDICK_SUMMARY = open('mobydick_summary.txt').read()
TEXT = TextBody(MOBYDICK_SUMMARY)
COUNTER = WordCounter(TEXT)

WORD_PAIRS = [('months', 1), ('whale', 5), ('captain', 4), ('white', 2),
              ('harpoon', 1), ('Ahab', 1)]


def check_word(word, number):
    assert_equal(COUNTER.count_word(word), number)


def test_word_pairs():
    # Tests a series of example words
    # creates one test for each word
    # --- no docstring so that parameters are visible ---
    for word, number in WORD_PAIRS:
        yield check_word, word, number


# nose does
for x, y, z in test_word_pairs:
    if x(y, z):
def test_empty():
    """Empty input works"""
    text = TextBody('')
    assert_(text.word_number, 0)
def test_nasty():
    """Ugly data example works."""
    text = TextBody(
        "That #~&%* program still doesn't work!\nI already de-bugged it 3 times, and still numpy.array keeps throwing AttributeErrors. What should I do?"
    )
    assert_(text.word_number, 22)
 def test_average_words_complex(self):
     """Complex average length."""
     text = TextBody(MOBYDICK_SUMMARY)
     self.assertAlmostEqual(text.get_average_word_length(), 4.0, 3)
예제 #16
0
def test_word_number_two():
    """Count two words in a text"""
    text = TextBody("two words")
    assert_equal(text.word_number, 2)
 def test_average_empty(self):
     """Tests behaviour when input is not a string."""
     text = TextBody("")
     self.assertRaises(TypeError, text.get_average_word_length)
def test_smallest():
    """Minimal string works."""
    text = TextBody("whale")
    assert_(text.words, ['whale'])