Пример #1
0
    async def test_content_that_is_not_text_never_match_content_simhash_of_sample(self):
        raw = b'Invalid UTF8 x\x80Z"'
        response = Response(200, {})
        response.set_content(raw, True)
        hash = self.filter._hash_response(StaticResponse(200, {}, "content"))
        self.kb.query_samples["example.com/"] = {"simhash": hash}

        with patch("tachyon.heuristics.rejectignoredquery.Simhash") as Simhash:
            await self.filter.after_response(Entry.create("http://example.com/?wsdl", response=response))

            Simhash.assert_not_called()
    async def test_content_that_is_not_text_never_match_content_simhash_of_sample(self):
        raw = b'Invalid UTF8 x\x80Z"'
        response = Response(200, {})
        response.set_content(raw, True)
        hash = self.filter._hash_response(StaticResponse(200, {}, "content"))
        self.kb.query_samples["example.com/"] = {"simhash": hash}

        with patch("tachyon.heuristics.rejectignoredquery.Simhash") as Simhash:
            await self.filter.after_response(Entry.create("http://example.com/?wsdl", response=response))

            Simhash.assert_not_called()
Пример #3
0
    async def on_request_successful(self, entry):
        if entry.response.code in self.safe_status_codes:
            entry.result.error_behavior = False
            return

        content_simhash = entry.result.content_simhash
        if content_simhash is None:
            entry.result.error_behavior = False
            return

        if any(
                content_simhash.distance(Simhash(known)) < self.match_threshold
                for known in self.known_bad_behavior):
            entry.result.error_behavior = True
            return

        if len(self.behavior_buffer) >= self.max_buffer_size:
            self.error_behavior = self._is_error_behavior(content_simhash)
            self.behavior_buffer.pop(0)

        if self.error_behavior:
            for x in self.behavior_buffer:
                self.known_bad_behavior.add(x)

        self.behavior_buffer.append(content_simhash.value)
        entry.result.error_behavior = self.error_behavior
Пример #4
0
    async def is_valid(self, entry):
        value = getattr(entry.result, 'error_simhash', None)
        if value is not None:
            # Revalidate the responses with the known bad behavior signatures.
            current = Simhash(value)
            if any(current.distance(Simhash(k)) < 5 for k in self.hammertime.kb.bad_behavior_response):
                return False

        try:
            await self.hammertime.request(entry.request.url, arguments=entry.arguments)
            return True
        except RejectRedirection:
            # This is most likely the home path check as the result would never reach revalidation otherwise
            return True
        except Exception as e:
            return False
    async def test_on_request_successful_pop_first_result_when_buffer_is_full(
            self):
        self.behavior_detection.behavior_buffer.extend([i for i in range(10)])

        await self.runner.perform_ok(self.entry)

        self.assertEqual(self.behavior_detection.behavior_buffer,
                         [1, 2, 3, 4, 5, 6, 7, 8, 9,
                          Simhash("data").value])
    async def test_after_response_reject_request_if_simhash_of_response_content_equals_sample_simhash(self):
        self.kb.query_samples["example.com/"] = {"simhash": Simhash("response content").value}
        entry = Entry.create("http://example.com/?wsdl", response=StaticResponse(200, {}, "response content"))
        slightly_different_response = Entry.create("http://example.com/?wsdl",
                                                   response=StaticResponse(200, {}, "response-content"))

        with self.assertRaises(RejectRequest):
            await self.filter.after_response(entry)
        with self.assertRaises(RejectRequest):
            await self.filter.after_response(slightly_different_response)
 async def test_homepage_do_not_count_as_soft_404(self):
     simhash = Simhash("response content").value
     for pattern in self.patterns:
         self.kb.soft_404_responses["http://example.com/"][pattern] = {
             "code": 200,
             "content_simhash": simhash
         }
         self.rule.performed["http://example.com/"] = {pattern: None}
     try:
         await self.rule.after_response(
             self.create_entry("http://example.com/",
                               response_content="home page"))
     except RejectRequest:
         self.fail("Request rejected.")
    async def test_homepage_do_not_count_as_soft_404(self):
        simhash = Simhash("response content")
        for pattern in ["/\l/\d.html", "/\d-\l.js", "/\L/", "/\i", "/.\l.js"]:
            self.kb.soft_404_responses["http://example.com/"][pattern] = {
                "code": 200,
                "content_simhash": simhash
            }
            self.rule.performed["http://example.com/"] = {pattern: None}
        entry = self.create_entry("http://example.com/",
                                  response_content="home page")

        await self.runner.perform_ok(entry)

        self.assertFalse(entry.result.soft404)
    async def test_reject_request_if_pattern_and_response_match_request_in_knowledge_base(
            self):
        for pattern in self.patterns:
            simhash = Simhash("response content").value
            self.kb.soft_404_responses["http://example.com/"][pattern] = {
                "code": 200,
                "content_simhash": simhash
            }
            self.rule.performed["http://example.com/"][pattern] = None

        urls = [
            urljoin("http://example.com/", path) for path in
            ["/test/123.html", "/123-test.js", "/TEST/", "/TesT", "/.test.js"]
        ]
        for url in urls:
            with self.assertRaises(RejectRequest):
                await self.rule.after_response(self.create_entry(url))
    async def test_dont_mark_as_soft404_if_no_match_in_knowledge_base(self):
        simhash = Simhash("response content")
        for pattern in ["/\l.html", "/\l", "/.\l", "/\l.php"]:
            self.kb.soft_404_responses["http://example.com/"][
                pattern] = ContentSignature(code=200, content_simhash=simhash)
            self.rule.performed["http://example.com/"][pattern] = None
        entries = [
            self.create_entry("http://example.com/test.html",
                              response_content="test"),
            self.create_entry("http://example.com/test",
                              response_content="test"),
            self.create_entry("http://example.com/.test",
                              response_content="test"),
            self.create_entry("http://example.com/test.php",
                              response_content="test")
        ]
        for entry in entries:
            await self.runner.perform_ok(entry)

        self.assertFalse(any(entry.result.soft404 for entry in entries))
    async def test_mark_request_has_soft404_if_pattern_and_response_match_request_in_knowledge_base(
            self):
        for pattern in [
                "/test/\d.html", "/\d-\l.js", "/\L/", "/\i", "/abc/.\l.js"
        ]:
            simhash = Simhash("response content")
            self.kb.soft_404_responses["http://example.com/"][
                pattern] = ContentSignature(code=200, content_simhash=simhash)
            self.rule.performed["http://example.com/"][pattern] = None

        urls = [
            urljoin("http://example.com/", path) for path in [
                "/test/123.html", "/123-test.js", "/TEST/", "/TesT",
                "/abc/.test.js"
            ]
        ]
        entries = [self.create_entry(url) for url in urls]
        for entry in entries:
            await self.runner.perform_ok(entry)

        self.assertTrue(all(entry.result.soft404 for entry in entries))
    async def test_add_alternate_url_response_to_knowledge_base(self):
        response = StaticResponse(200, {})
        response.content = "response content"
        self.engine.response = response

        await self.rule.after_response(
            self.create_entry("http://example.com/test",
                              response_content="response"))
        await self.rule.after_response(
            self.create_entry("http://example.com/123/",
                              response_content="response"))
        await self.rule.after_response(
            self.create_entry("http://example.com/.test",
                              response_content="response"))
        await self.rule.after_response(
            self.create_entry("http://example.com/123/test.js",
                              response_content="response"))

        simhash = Simhash(response.content).value
        self.assertEqual(
            self.kb.soft_404_responses["http://example.com/"], {
                "/\l": {
                    "code": 200,
                    "content_simhash": simhash
                },
                "/\d/": {
                    "code": 200,
                    "content_simhash": simhash
                },
                "/.\l": {
                    "code": 200,
                    "content_simhash": simhash
                },
                "/\d/\l.js": {
                    "code": 200,
                    "content_simhash": simhash
                }
            })
 async def test_dont_reject_request_if_no_match_in_knowledge_base(self):
     simhash = Simhash("response content").value
     for pattern in ["/\l.html", "/\l", "/.\l", "/\l.php"]:
         self.kb.soft_404_responses["http://example.com/"][pattern] = {
             "code": 200,
             "content_simhash": simhash
         }
         self.rule.performed["http://example.com/"][pattern] = None
     try:
         await self.rule.after_response(
             self.create_entry("http://example.com/test.html",
                               response_content="test"))
         await self.rule.after_response(
             self.create_entry("http://example.com/test",
                               response_content="test"))
         await self.rule.after_response(
             self.create_entry("http://example.com/.test",
                               response_content="test"))
         await self.rule.after_response(
             self.create_entry("http://example.com/test.php",
                               response_content="test"))
     except RejectRequest:
         self.fail("Request rejected.")
Пример #14
0
 def _responses_match(self, resp_simhash):
     for simhash_value in self.behavior_buffer:
         simhash = Simhash(simhash_value)
         yield resp_simhash.distance(simhash) < self.match_threshold
    async def test_on_request_successful_store_simhash_of_response_content_in_knowledge_base(
            self):
        await self.runner.perform_ok(self.entry)

        self.assertEqual(self.behavior_detection.behavior_buffer,
                         [Simhash(self.entry.response.content).value])
Пример #16
0
 def _create_simhash(self, content_response):
     return Simhash(content_response, self.match_filter, self.token_size)