def test_make_program_from_patterns_good(self): buf = b'aaaaabb a+b' patlist = [("a+b", ['UTF-8'], lightgrep.KeyOpts()), ("a+b", ['UTF-8'], lightgrep.KeyOpts(fixedString=True))] progOpts = lightgrep.ProgOpts() with lightgrep.make_program_from_patterns(patlist, progOpts) as prog: with lightgrep.Context(prog, lightgrep.CtxOpts()) as ctx: acc = lightgrep.HitAccumulator() ctx.searchBuffer(buf, acc) exp_hits = [{ 'start': 0, 'end': 6, 'keywordIndex': 0, 'pattern': 'a+b', 'encChain': 'UTF-8' }, { 'start': 8, 'end': 11, 'keywordIndex': 1, 'pattern': 'a+b', 'encChain': 'UTF-8' }] self.assertEqual(acc.Hits, exp_hits)
def test_count_three(self): self.pat.parse("a+b", lightgrep.KeyOpts()) self.fsm.add_pattern(self.prog, self.pat, 'UTF-8', 42) self.pat.parse("foo", lightgrep.KeyOpts()) self.fsm.add_pattern(self.prog, self.pat, 'UTF-8', 1) self.pat.parse(".+", lightgrep.KeyOpts()) self.fsm.add_pattern(self.prog, self.pat, 'UTF-8', 75) self.assertEqual(self.prog.count(), 3)
def test_size(self): with lightgrep.Program(0) as prog: with lightgrep.Pattern() as pat: with lightgrep.Fsm(0) as fsm: pat.parse("a+b", lightgrep.KeyOpts()) fsm.add_pattern(prog, pat, 'UTF-8', 42) prog.compile(fsm, lightgrep.ProgOpts()) self.assertTrue(prog.size() > 0)
def setUp(self): self.prog = lightgrep.Program(0) self.addCleanup(self.prog.close) with lightgrep.Pattern() as pat: with lightgrep.Fsm(0) as fsm: pat.parse("a+b", lightgrep.KeyOpts()) fsm.add_pattern(self.prog, pat, 'UTF-8', 42) self.prog.compile(fsm, lightgrep.ProgOpts()) self.ctx = lightgrep.Context(self.prog, lightgrep.CtxOpts()) self.addCleanup(self.ctx.close)
def test_write_read(self): with lightgrep.Program(0) as prog1: with lightgrep.Pattern() as pat: with lightgrep.Fsm(0) as fsm: pat.parse("a+b", lightgrep.KeyOpts()) fsm.add_pattern(prog1, pat, 'UTF-8', 42) prog1.compile(fsm, lightgrep.ProgOpts()) buf = prog1.write() with lightgrep.Program(buf) as prog2: self.assertEqual(prog2.count(), prog1.count()) self.assertEqual(prog2.size(), prog1.size())
def test_flags(self): for fixed_string in (False, True): for case_insensitive in (False, True): for unicode_mode in (False, True): with self.subTest(fixed_string=fixed_string, case_insensitive=case_insensitive): opts = lightgrep.KeyOpts( fixedString=fixed_string, caseInsensitive=case_insensitive, unicodeMode=unicode_mode) self.assertEqual(opts.isFixed(), fixed_string) self.assertEqual(opts.isCaseSensitive(), not case_insensitive) self.assertEqual(opts.isUnicodeMode(), unicode_mode)
import lightgrep as lg # Using the with statement correctly releases lightgrep resources when block # closes; better to loop over files/string within the lightgrep with statement, # of course, as Lightgrep initialization is relatively heavyweight searchString = "hello, World O'Sullivan, please don't bl0w up Nain s\\09-123/12-002 s\\EU-12-23 s\\AU-13-059 " testString = "hello, World" searchData = searchString.encode('utf-8') testData = testString.encode('utf-8') print(f"searchString: {searchString}") keys = [ ("hello", ["UTF-8", "ISO-8859-1"], lg.KeyOpts(fixedString=True, caseInsensitive=False)), ("world", ["UTF-8"], lg.KeyOpts(fixedString=True, caseInsensitive=True)), # bl0w ("bl\\dw", ["UTF-8"], lg.KeyOpts(fixedString=False, caseInsensitive=True)), ("[^a-z]+", ["UTF-8"], lg.KeyOpts(fixedString=False, caseInsensitive=True)), # Backslash must be escaped once for Python, and again for Lightgrep ("s\\\\((A|E)U\\-)?\\d{1,3}-\\d{1,4}[^a-zA-Z0-9]", ["UTF-8"], lg.KeyOpts(fixedString=False, caseInsensitive=False)), # Or you can use an r-string to avoid Python escaping (r'\\[\d-]+', ["UTF-8"], lg.KeyOpts(fixedString=False, caseInsensitive=False)) ] # Using with to open a Lightgrep object and perform a search, # passing keys and callback at init. print("============================") print("Results using 'with'") withHits = lg.HitAccumulator()
def test_add_pattern_good(self): self.pat.parse("a+b", lightgrep.KeyOpts()) idx = self.fsm.add_pattern(self.prog, self.pat, 'UTF-8', 42) self.assertEqual(idx, 0)
def test_add_pattern_bad_args(self): # fuzz add_pattern() self.pat.parse("a+b", lightgrep.KeyOpts()) arglist = [self.prog, self.pat, 'UTF-8', 42] subs = (None, 'bogus') fuzz_it(self, self.fsm.add_pattern, arglist, subs)
def test_add_pattern_closed_pat(self): self.pat.parse("a+b", lightgrep.KeyOpts()) self.pat.close() with self.assertRaises(RuntimeError): self.fsm.add_pattern(self.prog, self.pat, 'UTF-8', 42)
def test_parse_bad_args(self): arglist = ["a", lightgrep.KeyOpts()] subs = (None, '*') fuzz_it(self, self.pat.parse, arglist, subs)
def test_parse_closed(self): self.pat.close() with self.assertRaises(RuntimeError): self.pat.parse("a", lightgrep.KeyOpts())
def test_parse_bad(self): with self.assertRaises(RuntimeError): self.pat.parse("+", lightgrep.KeyOpts())
def test_pattern_good(self): self.pat.parse("a+b", lightgrep.KeyOpts())
#!/usr/bin/python3 import array import ctypes import mmap import unittest import lightgrep PATLIST = [("a+b", ['UTF-8', 'iso-8859-1'], lightgrep.KeyOpts(caseInsensitive=True)), ("a+b", ['UTF-8', 'iso-8859-1'], lightgrep.KeyOpts(fixedString=True))] def fuzz_args(arglist, subs): for i in range(0, len(arglist)): args = arglist.copy() for s in subs: args[i] = s yield args def fuzz_it(testobj, func, arglist, subs): for args in fuzz_args(arglist, subs): with testobj.subTest(args=args): with testobj.assertRaises(Exception): func(*args) class PointerTests(unittest.TestCase):