Python SuspectFilter.get_args примеры использования

Язык программирования: Python

Пространство имен/Пакет: fuglu.shared

Класс/Тип: SuspectFilter

Метод/Функция: get_args

Примеров на hotexamples.com: 4

Python SuspectFilter.get_args - 4 примера найдено. Это лучшие примеры Python кода для fuglu.shared.SuspectFilter.get_args, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

SuspectFilter(13)

matches(6)

get_field(3)

get_args(2)

lint(2)

strip_text(2)

Пример #1

Показать файл

Файл: headerwriter.py Проект: danBLA/fuglu-extra-plugins

class HeaderwriterPlugin(ScannerPlugin):
    """
    Writes custom log based on suspect filter rules
    
    eg. if you put this into headerwriter.regex:
    From: (microsoft\.com|yahoo\.com|gmail\.com) ${id} claims to be from ${matchedvalue}
    
    fuglu would write a log with fuglu-id's whose from-domain is microsoft.com,yahoo.com or gmail.com
    """
    def __init__(self, config, section=None):
        ScannerPlugin.__init__(self, config, section)

        self.requiredvars = {
            'filterfile': {
                'default': '/etc/fuglu/headerwriter.regex',
                'description': 'Suspectfilter File',
            },
            'outputfile': {
                'default': '',
                'description': 'Output File',
            },
            'defaultlinetemplate': {
                'default':
                '${fieldname}: ${matchedvalue}',
                'description':
                'Default line output template if nothing is specified in filter config',
            }
        }
        self.filter = None

    def examine(self, suspect):
        starttime = time.time()
        if self.filter == None:
            self.filter = SuspectFilter(
                self.config.get(self.section, 'filterfile'))

        hits = self.filter.get_args(suspect, extended=True)
        if len(hits) == 0:
            return DUNNO

        #open file
        ofile = self.config.get(self.section, 'outputfile')
        if ofile.strip() == '':
            self._logger().error("No output file specified for headerwriter")
            return DUNNO

        fh = open(ofile, 'a')
        for hit in hits:
            (fieldname, matchedvalue, arg, regex) = hit
            if arg == None or arg == '':
                arg = self.config.get(self.section, 'defaultlinetemplate')

            addvalues = dict(fieldname=fieldname,
                             matchedvalue=matchedvalue,
                             regex=regex)
            outputline = apply_template(arg, suspect, addvalues)
            fh.write(outputline)
            fh.write('\n')

        fh.close()

    def lint(self):
        filterfile = self.config.get(self.section, 'filterfile')
        if not os.path.exists(filterfile):
            print "file not found: %s" % filterfile
            return False

        if self.config.get(self.section, 'outputfile').strip() == '':
            print "No outputfile configured"
            return False

        return True

Пример #2

Показать файл

Файл: headerwriter.py Проект: gryphius/fuglu-extra-plugins

class HeaderwriterPlugin(ScannerPlugin):
    """
    Writes custom log based on suspect filter rules
    
    eg. if you put this into headerwriter.regex:
    From: (microsoft\.com|yahoo\.com|gmail\.com) ${id} claims to be from ${matchedvalue}
    
    fuglu would write a log with fuglu-id's whose from-domain is microsoft.com,yahoo.com or gmail.com
    """

    def __init__(self,config,section=None):
        ScannerPlugin.__init__(self,config,section)
        
        self.requiredvars={
            'filterfile':{
                'default':'/etc/fuglu/headerwriter.regex',
                'description':'Suspectfilter File',
            },
                           
            'outputfile':{
                'default':'',
                'description':'Output File',
            },
                           
            'defaultlinetemplate':{
                 'default':'${fieldname}: ${matchedvalue}',
                'description':'Default line output template if nothing is specified in filter config',                  
            }
            
        }
        self.filter=None

    def examine(self,suspect):
        starttime=time.time()
        if self.filter==None:
            self.filter=SuspectFilter(self.config.get(self.section,'filterfile'))
        
            
        hits=self.filter.get_args(suspect,extended=True)
        if len(hits)==0:
            return DUNNO
            
        #open file
        ofile=self.config.get(self.section,'outputfile')
        if ofile.strip()=='':
            self._logger().error("No output file specified for headerwriter")
            return DUNNO
            
        fh=open(ofile,'a')
        for hit in hits:
            (fieldname, matchedvalue, arg, regex)=hit
            if arg==None or arg=='':
                arg=self.config.get(self.section,'defaultlinetemplate')
            
            addvalues=dict(fieldname=fieldname,matchedvalue=matchedvalue,regex=regex)
            outputline=apply_template(arg, suspect, addvalues)
            fh.write(outputline)
            fh.write('\n')
            
        fh.close()
        
    def lint(self):
        filterfile=self.config.get(self.section,'filterfile')
        if not os.path.exists(filterfile):
            print "file not found: %s"%filterfile
            return False
        
        if self.config.get(self.section,'outputfile').strip()=='':
            print "No outputfile configured"
            return False
        
        return True

Пример #3

Показать файл

Файл: shared_test.py Проект: jahlives/fuglu

class SuspectFilterTestCase(unittest.TestCase):
    """Test Suspectfilter"""
    def setUp(self):
        self.candidate = SuspectFilter(TESTDATADIR + '/headertest.regex')

    def tearDown(self):
        pass

    def test_sf_get_args(self):
        """Test SuspectFilter files"""
        suspect = Suspect('*****@*****.**',
                          '*****@*****.**',
                          TESTDATADIR + '/helloworld.eml')
        suspect.tags['testtag'] = 'testvalue'

        headermatches = self.candidate.get_args(suspect)
        self.assertTrue('Sent to unittest domain!' in headermatches,
                        "To_domain not found in headercheck")
        self.assertTrue(
            'Envelope sender is [email protected]' in headermatches,
            "Envelope Sender not matched in header chekc")
        self.assertTrue('Mime Version is 1.0' in headermatches,
                        "Standard header Mime Version not found")
        self.assertTrue('A tag match' in headermatches,
                        "Tag match did not work")
        self.assertTrue('Globbing works' in headermatches,
                        "header globbing failed")
        self.assertTrue('body rule works' in headermatches,
                        "decoded body rule failed")
        self.assertTrue('full body rule works' in headermatches,
                        "full body failed")
        self.assertTrue('mime rule works' in headermatches, "mime rule failed")
        self.assertFalse(
            'this should not match in a body rule' in headermatches,
            'decoded body rule matched raw body')

        # perl style advanced rules
        self.assertTrue('perl-style /-notation works!' in headermatches,
                        "new rule format failed: %s" % headermatches)
        self.assertTrue(
            'perl-style recipient match' in headermatches,
            "new rule format failed for to_domain: %s" % headermatches)
        self.assertFalse('this should not match' in headermatches,
                         "rule flag ignorecase was not detected")

        # TODO: raw body rules

    def test_sf_matches(self):
        """Test SuspectFilter extended matches"""

        suspect = Suspect('*****@*****.**',
                          '*****@*****.**',
                          TESTDATADIR + '/helloworld.eml')

        (match, info) = self.candidate.matches(suspect, extended=True)
        self.assertTrue(match, 'Match should return True')
        field, matchedvalue, arg, regex = info
        self.assertTrue(field == 'to_domain')
        self.assertTrue(matchedvalue == 'unittests.fuglu.org')
        self.assertTrue(arg == 'Sent to unittest domain!')
        self.assertTrue(regex == 'unittests\.fuglu\.org')

    def test_sf_get_field(self):
        """Test SuspectFilter field extract"""
        suspect = Suspect('*****@*****.**',
                          '*****@*****.**',
                          TESTDATADIR + '/helloworld.eml')

        # additional field tests
        self.assertEqual(
            self.candidate.get_field(suspect, 'clienthelo')[0], 'helo1')
        self.assertEqual(
            self.candidate.get_field(suspect, 'clientip')[0], '10.0.0.1')
        self.assertEqual(
            self.candidate.get_field(suspect, 'clienthostname')[0], 'rdns1')

    def test_strip(self):
        html = """foo<a href="bar">bar</a><script language="JavaScript">echo('hello world');</script>baz"""

        declarationtest = """<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="de">
  <head>
    <title>greetings</title>
  </head>
  <body>
    <font color="red">well met!</font>
  </body>
</html>
"""
        # word generated empty message
        wordhtml = """<html xmlns:v=3D"urn:schemas-microsoft-com:vml"
xmlns:o=3D"urn:schemas-microsoft-com:office:office"
xmlns:w=3D"urn:schemas-microsoft-com:office:word"
xmlns:m=3D"http://schemas.microsoft.com/office/2004/12/omml"
xmlns=3D"http://www.w3.org/TR/REC-html40"><head><META
HTTP-EQUIV=3D"Content-Type" CONTENT=3D"text/html;
charset=3Dus-ascii"><meta name=3DGenerator content=3D"Microsoft Word 15
(filtered medium)"><style><!--
/* Font Definitions */
@font-face
	{font-family:"Cambria Math";
	panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
	{font-family:Calibri;
	panose-1:2 15 5 2 2 2 4 3 2 4;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
	{margin:0cm;
	margin-bottom:.0001pt;
	font-size:11.0pt;
	font-family:"Calibri",sans-serif;
	mso-fareast-language:EN-US;}
a:link, span.MsoHyperlink
	{mso-style-priority:99;
	color:#0563C1;
	text-decoration:underline;}
a:visited, span.MsoHyperlinkFollowed
	{mso-style-priority:99;
	color:#954F72;
	text-decoration:underline;}
span.E-MailFormatvorlage17
	{mso-style-type:personal-compose;
	font-family:"Calibri",sans-serif;
	color:windowtext;}
.MsoChpDefault
	{mso-style-type:export-only;
	font-family:"Calibri",sans-serif;
	mso-fareast-language:EN-US;}
@page WordSection1
	{size:612.0pt 792.0pt;
	margin:70.85pt 70.85pt 2.0cm 70.85pt;}
div.WordSection1
	{page:WordSection1;}
--></style><!--[if gte mso 9]><xml>
<o:shapedefaults v:ext=3D"edit" spidmax=3D"1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext=3D"edit">
<o:idmap v:ext=3D"edit" data=3D"1" />
</o:shapelayout></xml><![endif]--></head><body lang=3DDE-CH
link=3D"#0563C1" vlink=3D"#954F72"><div class=3DWordSection1><p
class=3DMsoNormal><o:p> </o:p></p></div></body></html>"""

        for use_bfs in [True, False]:
            stripped = self.candidate.strip_text(html, use_bfs=use_bfs)
            self.assertEqual(stripped, 'foobarbaz')

            docstripped = self.candidate.strip_text(declarationtest,
                                                    use_bfs=use_bfs)
            self.assertEqual(docstripped.split(),
                             ['greetings', 'well', 'met!'])

            wordhtmstripped = self.candidate.strip_text(wordhtml,
                                                        use_bfs=use_bfs)
            self.assertEqual(wordhtmstripped.strip(), '')

Пример #4

Показать файл

Файл: shared_test.py Проект: steigr/fuglu

class SuspectFilterTestCase(unittest.TestCase):

    """Test Header Filter"""

    def setUp(self):
        self.candidate = SuspectFilter(TESTDATADIR + '/headertest.regex')

    def tearDown(self):
        pass

    def test_sf_get_args(self):
        """Test SuspectFilter files"""
        suspect = Suspect('*****@*****.**',
                          '*****@*****.**', TESTDATADIR + '/helloworld.eml')
        suspect.tags['testtag'] = 'testvalue'

        headermatches = self.candidate.get_args(suspect)
        self.assertTrue(
            'Sent to unittest domain!' in headermatches, "To_domain not found in headercheck")
        self.assertTrue('Envelope sender is [email protected]' in headermatches,
                        "Envelope Sender not matched in header chekc")
        self.assertTrue('Mime Version is 1.0' in headermatches,
                        "Standard header Mime Version not found")
        self.assertTrue(
            'A tag match' in headermatches, "Tag match did not work")
        self.assertTrue(
            'Globbing works' in headermatches, "header globbing failed")
        self.assertTrue(
            'body rule works' in headermatches, "decoded body rule failed")
        self.assertTrue(
            'full body rule works' in headermatches, "full body failed")
        self.assertTrue('mime rule works' in headermatches, "mime rule failed")
        self.assertFalse('this should not match in a body rule' in headermatches,
                         'decoded body rule matched raw body')

        # perl style advanced rules
        self.assertTrue('perl-style /-notation works!' in headermatches,
                        "new rule format failed: %s" % headermatches)
        self.assertTrue('perl-style recipient match' in headermatches,
                        "new rule format failed for to_domain: %s" % headermatches)
        self.assertFalse('this should not match' in headermatches,
                         "rule flag ignorecase was not detected")

        # TODO: raw body rules

    def test_sf_matches(self):
        """Test SuspectFilter extended matches"""

        suspect = Suspect('*****@*****.**',
                          '*****@*****.**', TESTDATADIR + '/helloworld.eml')

        (match, info) = self.candidate.matches(suspect, extended=True)
        self.assertTrue(match, 'Match should return True')
        field, matchedvalue, arg, regex = info
        self.assertTrue(field == 'to_domain')
        self.assertTrue(matchedvalue == 'unittests.fuglu.org')
        self.assertTrue(arg == 'Sent to unittest domain!')
        self.assertTrue(regex == 'unittests\.fuglu\.org')

    def test_sf_get_field(self):
        """Test SuspectFilter field extract"""
        suspect = Suspect('*****@*****.**',
                          '*****@*****.**', TESTDATADIR + '/helloworld.eml')

        # additional field tests
        self.assertEqual(self.candidate.get_field(
            suspect, 'clienthelo')[0], 'helo1')
        self.assertEqual(self.candidate.get_field(
            suspect, 'clientip')[0], '10.0.0.1')
        self.assertEqual(self.candidate.get_field(
            suspect, 'clienthostname')[0], 'rdns1')

    def test_strip(self):
        html = """foo<a href="bar">bar</a><script language="JavaScript">echo('hello world');</script>baz"""

        declarationtest = """<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="de">
  <head>
    <title>greetings</title>
  </head>
  <body>
    <font color="red">well met!</font>
  </body>
</html>
"""
        # word generated empty message
        wordhtml = """<html xmlns:v=3D"urn:schemas-microsoft-com:vml"
xmlns:o=3D"urn:schemas-microsoft-com:office:office"
xmlns:w=3D"urn:schemas-microsoft-com:office:word"
xmlns:m=3D"http://schemas.microsoft.com/office/2004/12/omml"
xmlns=3D"http://www.w3.org/TR/REC-html40"><head><META
HTTP-EQUIV=3D"Content-Type" CONTENT=3D"text/html;
charset=3Dus-ascii"><meta name=3DGenerator content=3D"Microsoft Word 15
(filtered medium)"><style><!--
/* Font Definitions */
@font-face
	{font-family:"Cambria Math";
	panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
	{font-family:Calibri;
	panose-1:2 15 5 2 2 2 4 3 2 4;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
	{margin:0cm;
	margin-bottom:.0001pt;
	font-size:11.0pt;
	font-family:"Calibri",sans-serif;
	mso-fareast-language:EN-US;}
a:link, span.MsoHyperlink
	{mso-style-priority:99;
	color:#0563C1;
	text-decoration:underline;}
a:visited, span.MsoHyperlinkFollowed
	{mso-style-priority:99;
	color:#954F72;
	text-decoration:underline;}
span.E-MailFormatvorlage17
	{mso-style-type:personal-compose;
	font-family:"Calibri",sans-serif;
	color:windowtext;}
.MsoChpDefault
	{mso-style-type:export-only;
	font-family:"Calibri",sans-serif;
	mso-fareast-language:EN-US;}
@page WordSection1
	{size:612.0pt 792.0pt;
	margin:70.85pt 70.85pt 2.0cm 70.85pt;}
div.WordSection1
	{page:WordSection1;}
--></style><!--[if gte mso 9]><xml>
<o:shapedefaults v:ext=3D"edit" spidmax=3D"1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext=3D"edit">
<o:idmap v:ext=3D"edit" data=3D"1" />
</o:shapelayout></xml><![endif]--></head><body lang=3DDE-CH
link=3D"#0563C1" vlink=3D"#954F72"><div class=3DWordSection1><p
class=3DMsoNormal><o:p> </o:p></p></div></body></html>"""

        for use_bfs in [True, False]:
            stripped = self.candidate.strip_text(html, use_bfs=use_bfs)
            self.assertEqual(stripped, 'foobarbaz')

            docstripped = self.candidate.strip_text(
                declarationtest, use_bfs=use_bfs)
            self.assertEqual(
                docstripped.split(), ['greetings', 'well', 'met!'])

            wordhtmstripped = self.candidate.strip_text(
                wordhtml, use_bfs=use_bfs)
            self.assertEqual(wordhtmstripped.strip(), '')