/
testhighlight.py
executable file
·129 lines (104 loc) · 4.83 KB
/
testhighlight.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/env python
#
# Author: vaibhav@bhembre.com
# URL: http://highlightdocdemo.appspot.com/
# Runtime: Python v2.6.1
#
# Description:
# This code file contains test cases to run against the 'highlightdoc' module. The test cases are elaborated upon
# at the place of their definition. A sample string is assumed as the document to compare against, for our convenience.
import highlightdoc
import re
import unittest
doc = "There is a fire man. There is a fire dog. There is a fat dog. \
There is a fire boy. There is a fat man."
class TestHighlight(unittest.TestCase):
def setUp(self):
""" Set up a new instance of helper class before running every test. """
self.doc = doc
self.h = Helper()
def test_snippetsize(self):
""" Test case to verify if the snippet size is greater than the query size. """
query = "boy"
outstr = "There is a fire " + self.h.tagQuery(query) + "."
teststr = self.h.testQuery(query)
self.h.outTest("Snippet-size", query, outstr, teststr)
self.assertEqual(outstr, teststr)
assert len(teststr) >= len(query), "Snippet size less than query size test fails."
def test_case_insensitivity(self):
""" Test case to check if search result yields to queries in a case-insensitive
manner."""
query = "Fat"
oritext = "fat"
outstr = "There is a " + self.h.tagQuery(oritext) + " dog." \
+ self.h.ext + " There is a " + self.h.tagQuery(oritext) + \
" man."
teststr = self.h.testQuery(query)
self.h.outTest("Case-sensitivity", query, outstr, teststr)
self.assertEqual(outstr, teststr, "Case-sensitivity test fails.")
def test_multiline(self):
""" Test case to verify if the snippet containing the results span from different
parts of the document. A relevant snippet can contain two non-adjacent sentences
in which case it should constitute of a triple dot sequence ('...') marking
the separation. """
query = "fire"
teststr = self.h.testQuery(query)
self.h.outTest("Multiple Sentences Display", query, "...", teststr)
assert (re.search("[\.]{3,4}", teststr) != None), "Multiple sentence test fails."
def test_singleline(self):
""" A snippet comprising entirely of a sequence of adjacent sentences should not contain
a triple dot sequence string ('...'). """
query = "dog"
teststr = self.h.testQuery(query)
self.h.outTest("Adjacent sentences display", query, "...", teststr)
assert (re.search("[\.]{3,4}", teststr) == None), "Adjacent sentences test fails."
def test_query_not_success(self):
""" A proper message should be displayed in case the query is unable to produce a match
within the document. """
query = "a fat clout"
teststr = self.h.testQuery(query)
self.h.outTest("Search Not Successful", query, "\"\"", teststr)
assert (teststr == "No match found."), "Query not successful test fails."
def test_query_till_last_word(self):
""" If the original query does not generate a match, try again by truncating the inital bits of
the query and see if the remaining generate a result. Keep doing this till the last word. The pattern
of removing the first word is chosen as it usually contains an adjective or adverb part-of-speech
whereas the latter part consists of the noun, the results containing would be more suited to the
intent of the user querying. However, this is an extremely naive way of searching for a match and
superior NLP/IR techniques need to be in place to keep the user happy. """
query = "a super fast dog"
teststr = self.h.testQuery(query)
self.h.outTest("Query until last word", query, "\"\"", teststr)
assert (teststr != "No match found."), "Query till last word test fails."
def test_nonstring_input(self):
""" Verifies if both the inputs to the highlightdoc function are of type string. """
query = 121
teststr = self.h.testQuery(query)
testerr = "Inputs to 'highlight_doc' must be in string format."
self.h.outTest("Nonstring input", query, "121", teststr)
self.assertEqual(teststr, testerr, "Nonstring input test fails.")
# Helper class for helping print/test the test cases
class Helper(object):
def __init__(self):
self.wrapTag = "highlight"
self.startWrapTag = "[[" + self.wrapTag.upper() + "]]"
self.endWrapTag = "[[END" + self.wrapTag.upper() + "]]"
self.ext = "..."
# Construct the 'tag-wrapper'.
def tagQuery(self, query):
return self.startWrapTag + query + self.endWrapTag
# Call highlight_doc function once invoked.
# document is passed through globally declared 'doc' while query is passed from the
# input parameter.
def testQuery(self, query):
return highlightdoc.highlight_doc(doc, query)
# Make it look nice on the outputstream.
def outTest(self, tag, query, outstr, teststr):
print "\n" + ("-"*40)
print "Testing: " + tag
print "Query: " + str(query) + "\n"
print outstr
print "AND"
print teststr
if __name__ == '__main__':
unittest.main()