Exemplo n.º 1
0
class TestMakeFilename(unittest.TestCase):
    def setUp(self):
        self.main_page_path = generate_unique_path()
        os.makedirs(self.main_page_path)
        self.url = yarl.URL('http://foo.com')
        self.root = 'http://example.com'
        self.max_depth = sys.maxsize
        self.loop = asyncio.new_event_loop()
        self.css_validate = 'false'
        self.handler = Cloner(self.root, self.max_depth, self.css_validate)
        self.filename = None
        self.hashname = None

    def test_make_filename(self):
        self.filename, self.hashname = self.handler._make_filename(self.url)
        self.assertEqual(self.filename, 'foo.com')
        self.assertEqual(self.hashname, '167a0418dd8ce3bf0ef00dfb6195f038')

    def test_make_filename_same_host(self):
        self.filename, self.hashname = self.handler._make_filename(
            yarl.URL(self.root))
        self.assertEqual(self.filename, '/index.html')
        self.assertEqual(self.hashname, 'd1546d731a9f30cc80127d57142a482b')

    def test_make_filename_relative(self):
        self.url = yarl.URL('/images')
        self.filename, self.hashname = self.handler._make_filename(self.url)
        self.assertEqual(self.filename, '/images')
        self.assertEqual(self.hashname, '41389bcf7f7427468d8c8675db2d4f98')

    def tearDown(self):
        shutil.rmtree(self.main_page_path)
Exemplo n.º 2
0
class TestGetBody(unittest.TestCase):
    def setUp(self):
        self.main_page_path = generate_unique_path()
        os.makedirs(self.main_page_path)
        self.root = "http://example.com"
        self.level = 0
        self.max_depth = sys.maxsize
        self.loop = asyncio.new_event_loop()
        self.css_validate = "false"
        self.handler = Cloner(self.root, self.max_depth, self.css_validate)
        self.target_path = "/opt/snare/pages/{}".format(yarl.URL(self.root).host)
        self.return_content = None
        self.expected_content = None
        self.filename = None
        self.hashname = None
        self.url = None
        self.content = None
        self.return_url = None
        self.return_level = None
        self.meta = None
        self.q_size = None

        self.session = aiohttp.ClientSession
        self.session.get = AsyncMock(
            return_value=aiohttp.ClientResponse(
                url=yarl.URL("http://www.example.com"),
                method="GET",
                writer=None,
                continue100=1,
                timer=None,
                request_info=None,
                traces=None,
                loop=self.loop,
                session=None,
            )
        )

    def test_get_body(self):
        self.content = b"""<html><body><a href="http://example.com/test"></a></body></html>"""

        aiohttp.ClientResponse._headers = {"Content-Type": "text/html"}
        aiohttp.ClientResponse.read = AsyncMock(return_value=self.content)
        self.filename, self.hashname = self.handler._make_filename(yarl.URL(self.root))
        self.expected_content = '<html><body><a href="/test"></a></body></html>'

        self.meta = {
            "/index.html": {
                "hash": "d1546d731a9f30cc80127d57142a482b",
                "headers": [{"Content-Type": "text/html"}],
            },
            "/test": {
                "hash": "4539330648b80f94ef3bf911f6d77ac9",
                "headers": [{"Content-Type": "text/html"}],
            },
        }

        async def test():
            await self.handler.new_urls.put((yarl.URL(self.root), 0))
            await self.handler.get_body(self.session)

        with self.assertLogs(level="DEBUG") as log:
            self.loop.run_until_complete(test())
            self.assertIn("DEBUG:snare.cloner:Cloned file: /test", "".join(log.output))

        with open(os.path.join(self.target_path, self.hashname)) as f:
            self.return_content = f.read()

        self.assertEqual(self.return_content, self.expected_content)
        self.assertEqual(
            self.handler.visited_urls[-2:],
            ["http://example.com/", "http://example.com/test"],
        )
        self.assertEqual(self.handler.meta, self.meta)

    def test_get_body_css_validate(self):
        aiohttp.ClientResponse._headers = {"Content-Type": "text/css"}

        self.css_validate = "true"
        self.handler = Cloner(self.root, self.max_depth, self.css_validate)
        self.content = b""".banner { background: url("/example.png") }"""
        aiohttp.ClientResponse.read = AsyncMock(return_value=self.content)
        self.expected_content = "http://example.com/example.png"
        self.return_size = 0
        self.meta = {
            "/example.png": {
                "hash": "5a64beebcd2a6f1cbd00b8370debaa72",
                "headers": [{"Content-Type": "text/css"}],
            },
            "/index.html": {
                "hash": "d1546d731a9f30cc80127d57142a482b",
                "headers": [{"Content-Type": "text/css"}],
            },
        }

        async def test():
            await self.handler.new_urls.put((yarl.URL(self.root), 0))
            await self.handler.get_body(self.session)
            self.q_size = self.handler.new_urls.qsize()

        self.loop.run_until_complete(test())
        self.assertEqual(self.handler.visited_urls[-1], self.expected_content)
        self.assertEqual(self.q_size, self.return_size)
        self.assertEqual(self.meta, self.handler.meta)

    def test_get_body_css_validate_scheme(self):
        aiohttp.ClientResponse._headers = {"Content-Type": "text/css"}

        self.css_validate = "true"
        self.return_size = 0
        self.handler = Cloner(self.root, self.max_depth, self.css_validate)
        self.content = [
            b""".banner { background: url("data://domain/test.txt") }""",
            b""".banner { background: url("file://domain/test.txt") }""",
        ]
        self.meta = {
            "/index.html": {
                "hash": "d1546d731a9f30cc80127d57142a482b",
                "headers": [{"Content-Type": "text/css"}],
            },
        }

        self.expected_content = "http://example.com/"

        async def test():
            await self.handler.new_urls.put((yarl.URL(self.root), 0))
            await self.handler.get_body(self.session)
            self.q_size = self.handler.new_urls.qsize()

        for content in self.content:
            aiohttp.ClientResponse.read = AsyncMock(return_value=content)
            self.loop.run_until_complete(test())
            self.assertEqual(self.return_size, self.q_size)
            self.assertEqual(self.handler.meta, self.meta)
            self.assertEqual(self.handler.visited_urls[-1], self.expected_content)

    def test_client_error(self):
        self.session.get = AsyncMock(side_effect=aiohttp.ClientError)

        async def test():
            await self.handler.new_urls.put((yarl.URL(self.root), 0))
            await self.handler.get_body(self.session)

        with self.assertLogs(level="ERROR") as log:
            self.loop.run_until_complete(test())
            self.assertIn("ERROR:snare.cloner:", "".join(log.output))

    def tearDown(self):
        shutil.rmtree(self.main_page_path)
Exemplo n.º 3
0
class TestGetBody(unittest.TestCase):
    def setUp(self):
        self.main_page_path = generate_unique_path()
        os.makedirs(self.main_page_path)
        self.root = 'http://example.com'
        self.level = 0
        self.max_depth = sys.maxsize
        self.loop = asyncio.new_event_loop()
        self.css_validate = 'false'
        self.handler = Cloner(self.root, self.max_depth, self.css_validate)
        self.target_path = '/opt/snare/pages/{}'.format(
            yarl.URL(self.root).host)
        self.return_content = None
        self.expected_content = None
        self.filename = None
        self.hashname = None
        self.url = None
        self.content = None
        self.return_url = None
        self.return_level = None
        self.meta = None
        self.q_size = None

        self.session = aiohttp.ClientSession
        self.session.get = AsyncMock(return_value=aiohttp.ClientResponse(
            url=yarl.URL("http://www.example.com"),
            method="GET",
            writer=None,
            continue100=1,
            timer=None,
            request_info=None,
            traces=None,
            loop=self.loop,
            session=None))

    def test_get_body(self):
        self.content = b'''<html><body><a href="http://example.com/test"></a></body></html>'''

        aiohttp.ClientResponse._headers = {'Content-Type': 'text/html'}
        aiohttp.ClientResponse.read = AsyncMock(return_value=self.content)
        self.filename, self.hashname = self.handler._make_filename(
            yarl.URL(self.root))
        self.expected_content = '<html><body><a href="/test"></a></body></html>'

        self.meta = {
            '/index.html': {
                'content_type': 'text/html',
                'hash': 'd1546d731a9f30cc80127d57142a482b'
            },
            '/test': {
                'content_type': 'text/html',
                'hash': '4539330648b80f94ef3bf911f6d77ac9'
            }
        }

        async def test():
            await self.handler.new_urls.put((yarl.URL(self.root), 0))
            await self.handler.get_body(self.session)

        self.loop.run_until_complete(test())

        with open(os.path.join(self.target_path, self.hashname)) as f:
            self.return_content = f.read()

        self.assertEqual(self.return_content, self.expected_content)
        self.assertEqual(self.handler.visited_urls[-2:],
                         ['http://example.com/', 'http://example.com/test'])
        self.assertEqual(self.handler.meta, self.meta)

    def test_get_body_css_validate(self):
        aiohttp.ClientResponse._headers = {'Content-Type': 'text/css'}

        self.css_validate = 'true'
        self.handler = Cloner(self.root, self.max_depth, self.css_validate)
        self.content = b'''.banner { background: url("/example.png") }'''
        aiohttp.ClientResponse.read = AsyncMock(return_value=self.content)
        self.expected_content = 'http://example.com/example.png'
        self.return_size = 0
        self.meta = {
            '/example.png': {
                'content_type': 'text/css',
                'hash': '5a64beebcd2a6f1cbd00b8370debaa72'
            },
            '/index.html': {
                'content_type': 'text/css',
                'hash': 'd1546d731a9f30cc80127d57142a482b'
            }
        }

        async def test():
            await self.handler.new_urls.put((yarl.URL(self.root), 0))
            await self.handler.get_body(self.session)
            self.q_size = self.handler.new_urls.qsize()

        self.loop.run_until_complete(test())
        self.assertEqual(self.handler.visited_urls[-1], self.expected_content)
        self.assertEqual(self.q_size, self.return_size)
        self.assertEqual(self.meta, self.handler.meta)

    def test_get_body_css_validate_scheme(self):
        aiohttp.ClientResponse._headers = {'Content-Type': 'text/css'}

        self.css_validate = 'true'
        self.return_size = 0
        self.handler = Cloner(self.root, self.max_depth, self.css_validate)

        self.content = [
            b'''.banner { background: url("data://domain/test.txt") }''',
            b'''.banner { background: url("file://domain/test.txt") }'''
        ]

        self.meta = {
            '/index.html': {
                'content_type': 'text/css',
                'hash': 'd1546d731a9f30cc80127d57142a482b'
            }
        }
        self.expected_content = 'http://example.com/'

        async def test():
            await self.handler.new_urls.put((yarl.URL(self.root), 0))
            await self.handler.get_body(self.session)
            self.q_size = self.handler.new_urls.qsize()

        for content in self.content:
            aiohttp.ClientResponse.read = AsyncMock(return_value=content)
            self.loop.run_until_complete(test())
            self.assertEqual(self.return_size, self.q_size)
            self.assertEqual(self.handler.meta, self.meta)
            self.assertEqual(self.handler.visited_urls[-1],
                             self.expected_content)

    def test_client_error(self):
        self.session.get = AsyncMock(side_effect=aiohttp.ClientError)

        async def test():
            await self.handler.new_urls.put((yarl.URL(self.root), 0))
            await self.handler.get_body(self.session)

        with self.assertLogs(level='ERROR') as log:
            self.loop.run_until_complete(test())
            self.assertIn('ERROR:snare.cloner:', log.output[0])

    def tearDown(self):
        shutil.rmtree(self.main_page_path)