class HeaderwriterPlugin(ScannerPlugin): """ Writes custom log based on suspect filter rules eg. if you put this into headerwriter.regex: From: (microsoft\.com|yahoo\.com|gmail\.com) ${id} claims to be from ${matchedvalue} fuglu would write a log with fuglu-id's whose from-domain is microsoft.com,yahoo.com or gmail.com """ def __init__(self, config, section=None): ScannerPlugin.__init__(self, config, section) self.requiredvars = { 'filterfile': { 'default': '/etc/fuglu/headerwriter.regex', 'description': 'Suspectfilter File', }, 'outputfile': { 'default': '', 'description': 'Output File', }, 'defaultlinetemplate': { 'default': '${fieldname}: ${matchedvalue}', 'description': 'Default line output template if nothing is specified in filter config', } } self.filter = None def examine(self, suspect): starttime = time.time() if self.filter == None: self.filter = SuspectFilter( self.config.get(self.section, 'filterfile')) hits = self.filter.get_args(suspect, extended=True) if len(hits) == 0: return DUNNO #open file ofile = self.config.get(self.section, 'outputfile') if ofile.strip() == '': self._logger().error("No output file specified for headerwriter") return DUNNO fh = open(ofile, 'a') for hit in hits: (fieldname, matchedvalue, arg, regex) = hit if arg == None or arg == '': arg = self.config.get(self.section, 'defaultlinetemplate') addvalues = dict(fieldname=fieldname, matchedvalue=matchedvalue, regex=regex) outputline = apply_template(arg, suspect, addvalues) fh.write(outputline) fh.write('\n') fh.close() def lint(self): filterfile = self.config.get(self.section, 'filterfile') if not os.path.exists(filterfile): print "file not found: %s" % filterfile return False if self.config.get(self.section, 'outputfile').strip() == '': print "No outputfile configured" return False return True
class HeaderwriterPlugin(ScannerPlugin): """ Writes custom log based on suspect filter rules eg. if you put this into headerwriter.regex: From: (microsoft\.com|yahoo\.com|gmail\.com) ${id} claims to be from ${matchedvalue} fuglu would write a log with fuglu-id's whose from-domain is microsoft.com,yahoo.com or gmail.com """ def __init__(self,config,section=None): ScannerPlugin.__init__(self,config,section) self.requiredvars={ 'filterfile':{ 'default':'/etc/fuglu/headerwriter.regex', 'description':'Suspectfilter File', }, 'outputfile':{ 'default':'', 'description':'Output File', }, 'defaultlinetemplate':{ 'default':'${fieldname}: ${matchedvalue}', 'description':'Default line output template if nothing is specified in filter config', } } self.filter=None def examine(self,suspect): starttime=time.time() if self.filter==None: self.filter=SuspectFilter(self.config.get(self.section,'filterfile')) hits=self.filter.get_args(suspect,extended=True) if len(hits)==0: return DUNNO #open file ofile=self.config.get(self.section,'outputfile') if ofile.strip()=='': self._logger().error("No output file specified for headerwriter") return DUNNO fh=open(ofile,'a') for hit in hits: (fieldname, matchedvalue, arg, regex)=hit if arg==None or arg=='': arg=self.config.get(self.section,'defaultlinetemplate') addvalues=dict(fieldname=fieldname,matchedvalue=matchedvalue,regex=regex) outputline=apply_template(arg, suspect, addvalues) fh.write(outputline) fh.write('\n') fh.close() def lint(self): filterfile=self.config.get(self.section,'filterfile') if not os.path.exists(filterfile): print "file not found: %s"%filterfile return False if self.config.get(self.section,'outputfile').strip()=='': print "No outputfile configured" return False return True
class SuspectFilterTestCase(unittest.TestCase): """Test Suspectfilter""" def setUp(self): self.candidate = SuspectFilter(TESTDATADIR + '/headertest.regex') def tearDown(self): pass def test_sf_get_args(self): """Test SuspectFilter files""" suspect = Suspect('*****@*****.**', '*****@*****.**', TESTDATADIR + '/helloworld.eml') suspect.tags['testtag'] = 'testvalue' headermatches = self.candidate.get_args(suspect) self.assertTrue('Sent to unittest domain!' in headermatches, "To_domain not found in headercheck") self.assertTrue( 'Envelope sender is [email protected]' in headermatches, "Envelope Sender not matched in header chekc") self.assertTrue('Mime Version is 1.0' in headermatches, "Standard header Mime Version not found") self.assertTrue('A tag match' in headermatches, "Tag match did not work") self.assertTrue('Globbing works' in headermatches, "header globbing failed") self.assertTrue('body rule works' in headermatches, "decoded body rule failed") self.assertTrue('full body rule works' in headermatches, "full body failed") self.assertTrue('mime rule works' in headermatches, "mime rule failed") self.assertFalse( 'this should not match in a body rule' in headermatches, 'decoded body rule matched raw body') # perl style advanced rules self.assertTrue('perl-style /-notation works!' in headermatches, "new rule format failed: %s" % headermatches) self.assertTrue( 'perl-style recipient match' in headermatches, "new rule format failed for to_domain: %s" % headermatches) self.assertFalse('this should not match' in headermatches, "rule flag ignorecase was not detected") # TODO: raw body rules def test_sf_matches(self): """Test SuspectFilter extended matches""" suspect = Suspect('*****@*****.**', '*****@*****.**', TESTDATADIR + '/helloworld.eml') (match, info) = self.candidate.matches(suspect, extended=True) self.assertTrue(match, 'Match should return True') field, matchedvalue, arg, regex = info self.assertTrue(field == 'to_domain') self.assertTrue(matchedvalue == 'unittests.fuglu.org') self.assertTrue(arg == 'Sent to unittest domain!') self.assertTrue(regex == 'unittests\.fuglu\.org') def test_sf_get_field(self): """Test SuspectFilter field extract""" suspect = Suspect('*****@*****.**', '*****@*****.**', TESTDATADIR + '/helloworld.eml') # additional field tests self.assertEqual( self.candidate.get_field(suspect, 'clienthelo')[0], 'helo1') self.assertEqual( self.candidate.get_field(suspect, 'clientip')[0], '10.0.0.1') self.assertEqual( self.candidate.get_field(suspect, 'clienthostname')[0], 'rdns1') def test_strip(self): html = """foo<a href="bar">bar</a><script language="JavaScript">echo('hello world');</script>baz""" declarationtest = """<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="de"> <head> <title>greetings</title> </head> <body> <font color="red">well met!</font> </body> </html> """ # word generated empty message wordhtml = """<html xmlns:v=3D"urn:schemas-microsoft-com:vml" xmlns:o=3D"urn:schemas-microsoft-com:office:office" xmlns:w=3D"urn:schemas-microsoft-com:office:word" xmlns:m=3D"http://schemas.microsoft.com/office/2004/12/omml" xmlns=3D"http://www.w3.org/TR/REC-html40"><head><META HTTP-EQUIV=3D"Content-Type" CONTENT=3D"text/html; charset=3Dus-ascii"><meta name=3DGenerator content=3D"Microsoft Word 15 (filtered medium)"><style><!-- /* Font Definitions */ @font-face {font-family:"Cambria Math"; panose-1:2 4 5 3 5 4 6 3 2 4;} @font-face {font-family:Calibri; panose-1:2 15 5 2 2 2 4 3 2 4;} /* Style Definitions */ p.MsoNormal, li.MsoNormal, div.MsoNormal {margin:0cm; margin-bottom:.0001pt; font-size:11.0pt; font-family:"Calibri",sans-serif; mso-fareast-language:EN-US;} a:link, span.MsoHyperlink {mso-style-priority:99; color:#0563C1; text-decoration:underline;} a:visited, span.MsoHyperlinkFollowed {mso-style-priority:99; color:#954F72; text-decoration:underline;} span.E-MailFormatvorlage17 {mso-style-type:personal-compose; font-family:"Calibri",sans-serif; color:windowtext;} .MsoChpDefault {mso-style-type:export-only; font-family:"Calibri",sans-serif; mso-fareast-language:EN-US;} @page WordSection1 {size:612.0pt 792.0pt; margin:70.85pt 70.85pt 2.0cm 70.85pt;} div.WordSection1 {page:WordSection1;} --></style><!--[if gte mso 9]><xml> <o:shapedefaults v:ext=3D"edit" spidmax=3D"1026" /> </xml><![endif]--><!--[if gte mso 9]><xml> <o:shapelayout v:ext=3D"edit"> <o:idmap v:ext=3D"edit" data=3D"1" /> </o:shapelayout></xml><![endif]--></head><body lang=3DDE-CH link=3D"#0563C1" vlink=3D"#954F72"><div class=3DWordSection1><p class=3DMsoNormal><o:p> </o:p></p></div></body></html>""" for use_bfs in [True, False]: stripped = self.candidate.strip_text(html, use_bfs=use_bfs) self.assertEqual(stripped, 'foobarbaz') docstripped = self.candidate.strip_text(declarationtest, use_bfs=use_bfs) self.assertEqual(docstripped.split(), ['greetings', 'well', 'met!']) wordhtmstripped = self.candidate.strip_text(wordhtml, use_bfs=use_bfs) self.assertEqual(wordhtmstripped.strip(), '')
class SuspectFilterTestCase(unittest.TestCase): """Test Header Filter""" def setUp(self): self.candidate = SuspectFilter(TESTDATADIR + '/headertest.regex') def tearDown(self): pass def test_sf_get_args(self): """Test SuspectFilter files""" suspect = Suspect('*****@*****.**', '*****@*****.**', TESTDATADIR + '/helloworld.eml') suspect.tags['testtag'] = 'testvalue' headermatches = self.candidate.get_args(suspect) self.assertTrue( 'Sent to unittest domain!' in headermatches, "To_domain not found in headercheck") self.assertTrue('Envelope sender is [email protected]' in headermatches, "Envelope Sender not matched in header chekc") self.assertTrue('Mime Version is 1.0' in headermatches, "Standard header Mime Version not found") self.assertTrue( 'A tag match' in headermatches, "Tag match did not work") self.assertTrue( 'Globbing works' in headermatches, "header globbing failed") self.assertTrue( 'body rule works' in headermatches, "decoded body rule failed") self.assertTrue( 'full body rule works' in headermatches, "full body failed") self.assertTrue('mime rule works' in headermatches, "mime rule failed") self.assertFalse('this should not match in a body rule' in headermatches, 'decoded body rule matched raw body') # perl style advanced rules self.assertTrue('perl-style /-notation works!' in headermatches, "new rule format failed: %s" % headermatches) self.assertTrue('perl-style recipient match' in headermatches, "new rule format failed for to_domain: %s" % headermatches) self.assertFalse('this should not match' in headermatches, "rule flag ignorecase was not detected") # TODO: raw body rules def test_sf_matches(self): """Test SuspectFilter extended matches""" suspect = Suspect('*****@*****.**', '*****@*****.**', TESTDATADIR + '/helloworld.eml') (match, info) = self.candidate.matches(suspect, extended=True) self.assertTrue(match, 'Match should return True') field, matchedvalue, arg, regex = info self.assertTrue(field == 'to_domain') self.assertTrue(matchedvalue == 'unittests.fuglu.org') self.assertTrue(arg == 'Sent to unittest domain!') self.assertTrue(regex == 'unittests\.fuglu\.org') def test_sf_get_field(self): """Test SuspectFilter field extract""" suspect = Suspect('*****@*****.**', '*****@*****.**', TESTDATADIR + '/helloworld.eml') # additional field tests self.assertEqual(self.candidate.get_field( suspect, 'clienthelo')[0], 'helo1') self.assertEqual(self.candidate.get_field( suspect, 'clientip')[0], '10.0.0.1') self.assertEqual(self.candidate.get_field( suspect, 'clienthostname')[0], 'rdns1') def test_strip(self): html = """foo<a href="bar">bar</a><script language="JavaScript">echo('hello world');</script>baz""" declarationtest = """<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="de"> <head> <title>greetings</title> </head> <body> <font color="red">well met!</font> </body> </html> """ # word generated empty message wordhtml = """<html xmlns:v=3D"urn:schemas-microsoft-com:vml" xmlns:o=3D"urn:schemas-microsoft-com:office:office" xmlns:w=3D"urn:schemas-microsoft-com:office:word" xmlns:m=3D"http://schemas.microsoft.com/office/2004/12/omml" xmlns=3D"http://www.w3.org/TR/REC-html40"><head><META HTTP-EQUIV=3D"Content-Type" CONTENT=3D"text/html; charset=3Dus-ascii"><meta name=3DGenerator content=3D"Microsoft Word 15 (filtered medium)"><style><!-- /* Font Definitions */ @font-face {font-family:"Cambria Math"; panose-1:2 4 5 3 5 4 6 3 2 4;} @font-face {font-family:Calibri; panose-1:2 15 5 2 2 2 4 3 2 4;} /* Style Definitions */ p.MsoNormal, li.MsoNormal, div.MsoNormal {margin:0cm; margin-bottom:.0001pt; font-size:11.0pt; font-family:"Calibri",sans-serif; mso-fareast-language:EN-US;} a:link, span.MsoHyperlink {mso-style-priority:99; color:#0563C1; text-decoration:underline;} a:visited, span.MsoHyperlinkFollowed {mso-style-priority:99; color:#954F72; text-decoration:underline;} span.E-MailFormatvorlage17 {mso-style-type:personal-compose; font-family:"Calibri",sans-serif; color:windowtext;} .MsoChpDefault {mso-style-type:export-only; font-family:"Calibri",sans-serif; mso-fareast-language:EN-US;} @page WordSection1 {size:612.0pt 792.0pt; margin:70.85pt 70.85pt 2.0cm 70.85pt;} div.WordSection1 {page:WordSection1;} --></style><!--[if gte mso 9]><xml> <o:shapedefaults v:ext=3D"edit" spidmax=3D"1026" /> </xml><![endif]--><!--[if gte mso 9]><xml> <o:shapelayout v:ext=3D"edit"> <o:idmap v:ext=3D"edit" data=3D"1" /> </o:shapelayout></xml><![endif]--></head><body lang=3DDE-CH link=3D"#0563C1" vlink=3D"#954F72"><div class=3DWordSection1><p class=3DMsoNormal><o:p> </o:p></p></div></body></html>""" for use_bfs in [True, False]: stripped = self.candidate.strip_text(html, use_bfs=use_bfs) self.assertEqual(stripped, 'foobarbaz') docstripped = self.candidate.strip_text( declarationtest, use_bfs=use_bfs) self.assertEqual( docstripped.split(), ['greetings', 'well', 'met!']) wordhtmstripped = self.candidate.strip_text( wordhtml, use_bfs=use_bfs) self.assertEqual(wordhtmstripped.strip(), '')