def test_performance(): """ Tests calculate_tf for huge texts. """ import random palavras = ["zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "R$1000.00"] contents = "" language = 'english' for _ in range(10000): # increase number and measure time when necessary contents += palavras[random.randint(0, len(palavras) - 1)] + " " text.calculate_tf(language, contents)
def test_calculate_tf_en(): """ Tests calculate_tf for English contents. """ language = "english" contents = "Cooks who don't love cooking don't cook well." results = text.calculate_tf(language, contents) nose.tools.eq_(results['cook'], 3, "Wrong TF") nose.tools.eq_(results['love'], 1, "Wrong TF") nose.tools.eq_(results['well'], 1, "Wrong TF")
def test_calculate_tf_pt(): """ Tests calculate_tf for Portuguese contents. """ language = "portuguese" contents = "Eu não gostava do gosto gasto do gesto de agosto." results = text.calculate_tf(language, contents) nose.tools.eq_(results['gost'], 2, "Wrong TF") nose.tools.eq_(results['gast'], 1, "Wrong TF") nose.tools.eq_(results['gest'], 1, "Wrong TF") nose.tools.eq_(results['agost'], 1, "Wrong TF")
def main(argv): if len(argv) < 2: msg = "You must specify the language and the text" log.error(msg) return {"success": False, "message": msg} vocabulary_language = argv[0] text_for_tf = argv[1] tf_by_term = text.calculate_tf(vocabulary_language, text_for_tf) if tf_by_term is not None: return {"success": True, "results": tf_by_term}