def test(self): from highlighter import build_scorecard assert build_scorecard(None, None) == [] assert build_scorecard([], None) == [] assert build_scorecard(None, 1.0) == [] assert build_scorecard('Hello world', None) == [0.0] * len('Hello world') assert build_scorecard('Hello world', 2.0) == [2.0] * len('Hello world') assert build_scorecard('Hello world', 'a') == ['a'] * len('Hello world')
def test(self): from highlighter import score_index from highlighter import build_stem_index from highlighter import tokenize from highlighter import english_suffix_stemmer as stemmer from highlighter import build_scorecard regex = re.compile(r'(\W)') doc = "This ham sammy is the bomb!" tokens = tokenize(doc, regex) stem_lookup = dict((token, stemmer(token)) for token in tokens) stem_index = build_stem_index(tokens, stem_lookup) scorecard = build_scorecard(doc, 0.0) score_index(stem_index, scorecard, lambda x: 3.3) assert scorecard == [3.3, 3.3, 3.3, 0.0, 0.0, 3.3, 3.3, 3.3, 0.0, 3.3, 3.3, 3.3, 3.3, 3.3, 0.0, 0.0, 0.0, 0.0, 3.3, 3.3, 3.3, 0.0, 3.3, 3.3, 3.3, 3.3, 3.3] score_index(stem_index, scorecard, lambda x: 1.0 if len(x) > 1 else 0.0) assert scorecard == [4.3, 4.3, 4.3, 0.0, 0.0, 4.3, 4.3, 4.3, 0.0, 4.3, 4.3, 4.3, 4.3, 4.3, 0.0, 0.0, 0.0, 0.0, 4.3, 4.3, 4.3, 0.0, 4.3, 4.3, 4.3, 4.3, 3.3]