def create_probing_object(base_url, req_type, resp_handlers=[]): """ Loads the request data and response handlers supplied, and generates the respective EntryProbing instance """ # Probing request probe = EntryProbing(HTTPProbingRequest(base_url, method=req_type)) # Probing response for handler_data in resp_handlers: resp_handler = None handler_type = handler_data['handler_type'] if handler_type == 'text': resp_handler = TextMatchProbingResponse( text_match=handler_data['text_match_value'], opposite=handler_data['opposite']) elif handler_type == 'http_status': resp_handler = HTTPStatusProbingResponse( status_code=handler_data['http_status'], opposite=handler_data['opposite']) elif handler_type == 'binary': resp_handler = BinaryFormatProbingResponse( opposite=handler_data['opposite']) else: raise AssertionError probe.add_response_handler(resp_handler) return probe
def __filter_range(limits: Tuple[Union[int, datetime.date], Union[int, datetime.date]], entry_probe: EntryProbing, step_size: Union[int, relativedelta], mid_calc: Callable[[Any, Any], int], range_gen: Generator) -> Union[int, datetime.date]: """ Does a binary search in the given range to discover which part of it actually contains results, but checks cons_misses entries at a time before doing the division step. This method works for both dates and integers, and contains the barebones algorithm only. :param limits: tuple with lower and upper limits for the range to be checked :param entry_probe: instance of EntryProbing describing the request method and response validation :param step_size: value to be added to a given index to go to the next one :param mid_calc: function which takes the beginning and end of the current range being considered and calculates the midpoint :param range_gen: generator which takes the current mid point and the beginning and end of the current range being considered and yields all the points near the middle that we need to check :returns: position where the last hit entry was found, None if no entries were found """ # Validate inputs RangeInference.__range_validate_input(limits, entry_probe, step_size, mid_calc, range_gen) begin, end = limits last_hit = None delta = step_size curr_begin = begin curr_end = end while curr_begin < curr_end: mid = mid_calc(curr_begin, curr_end) # check the required number of entries before declaring a miss all_miss = True for i in range_gen(mid, curr_begin, curr_end): if entry_probe.check_entry(i): all_miss = False last_hit = i if all_miss: curr_end = mid - delta else: curr_begin = last_hit + delta return last_hit
def test_probing_param_errors(self): """ Tests the passing of invalid parameters to the probing methods """ # Invalid request handler self.assertRaises(TypeError, EntryProbing, [1]) # Invalid response handler probe = EntryProbing(GETProbingRequest("http://test.com/")) self.assertRaises(TypeError, probe.add_response_handler, [1])
def test_probing_not_found(self): """ Tests the general working cases for probing a not found entry """ # Checks the URL for a 200 code and the string "entry found" probe = EntryProbing(GETProbingRequest("http://test.com/")) probe.add_response_handler(HTTPStatusProbingResponse(200))\ .add_response_handler(TextMatchProbingResponse("entry found")) self.assertEqual(probe.check_entry(), False) # Checks the URL for a non-404 code and the string "entry found" probe = EntryProbing(GETProbingRequest("http://test.com/")) probe.add_response_handler(HTTPStatusProbingResponse(404, opposite=True))\ .add_response_handler(TextMatchProbingResponse("entry found")) self.assertEqual(probe.check_entry(), False) # Checks the URL for a 404 code and the string "entry not found" probe = EntryProbing(GETProbingRequest("http://test.com/")) probe.add_response_handler(HTTPStatusProbingResponse(404))\ .add_response_handler(TextMatchProbingResponse("entry not found")) self.assertEqual(probe.check_entry(), True) # Checks the URL for a non-503 code probe = EntryProbing(GETProbingRequest("http://test.com/")) probe.add_response_handler( HTTPStatusProbingResponse(503, opposite=True)) self.assertEqual(probe.check_entry(), True) # Check if response is stored properly probe = EntryProbing(GETProbingRequest("http://test.com/")) self.assertIsNone(probe.response) probe.check_entry() self.assertTrue(isinstance(probe.response, mock.Mock))
def test_probing_found(self): """ Tests the general working cases for probing a found entry """ # Checks the URL for a 200 code, the string "entry found" and a text # type probe = EntryProbing(GETProbingRequest("http://test.com/")) probe.add_response_handler(HTTPStatusProbingResponse(200))\ .add_response_handler(BinaryFormatProbingResponse(opposite=True))\ .add_response_handler(TextMatchProbingResponse("entry found")) self.assertEqual(probe.check_entry(), True) # The same as above but checks for a binary file probe = EntryProbing(GETProbingRequest("http://test.com/")) probe.add_response_handler(HTTPStatusProbingResponse(200))\ .add_response_handler(BinaryFormatProbingResponse())\ .add_response_handler(TextMatchProbingResponse("entry found")) self.assertEqual(probe.check_entry(), False) # Checks the URL for a non-404 code and the string "entry found" probe = EntryProbing(GETProbingRequest("http://test.com/")) probe.add_response_handler(HTTPStatusProbingResponse(404, opposite=True))\ .add_response_handler(TextMatchProbingResponse("entry found")) self.assertEqual(probe.check_entry(), True) # Checks the URL for a 404 code, a 200 code, and the string # "entry found" (should always fail) probe = EntryProbing(GETProbingRequest("http://test.com/")) probe.add_response_handler(HTTPStatusProbingResponse(404))\ .add_response_handler(HTTPStatusProbingResponse(200))\ .add_response_handler(TextMatchProbingResponse("entry found")) self.assertEqual(probe.check_entry(), False) # Just requests without any checks (should default to True) probe = EntryProbing(GETProbingRequest("http://test.com/")) self.assertEqual(probe.check_entry(), True) # Check if response is stored properly probe = EntryProbing(GETProbingRequest("http://test.com/")) self.assertIsNone(probe.response) probe.check_entry() self.assertTrue(isinstance(probe.response, mock.Mock))
def test_probing_found_sync(self): """ Tests the general working cases for probing a found entry using a non asynchronous method. We only use GET requests without extra parameters, since the tests for different request methods is done in a separate test file. """ # Changes the method used by the HTTPProbingRequest when using GET to # use our mock HTTPProbingRequest.REQUEST_METHODS["GET"] = self.response_200 # checks the URL for a 200 code, the string "entry found" and a text # type probe = EntryProbing( HTTPProbingRequest("http://test.com/", method="GET")) probe.add_response_handler(HTTPStatusProbingResponse(200))\ .add_response_handler(BinaryFormatProbingResponse(opposite=True))\ .add_response_handler(TextMatchProbingResponse("entry found")) self.assertTrue(probe.check_entry()) # the same as above but checks for a binary file probe = EntryProbing( HTTPProbingRequest("http://test.com/", method="GET")) probe.add_response_handler(HTTPStatusProbingResponse(200))\ .add_response_handler(BinaryFormatProbingResponse())\ .add_response_handler(TextMatchProbingResponse("entry found")) self.assertFalse(probe.check_entry()) # checks the URL for a non-404 code and the string "entry found" probe = EntryProbing( HTTPProbingRequest("http://test.com/", method="GET")) probe.add_response_handler(HTTPStatusProbingResponse(404, opposite=True))\ .add_response_handler(TextMatchProbingResponse("entry found")) self.assertTrue(probe.check_entry()) # checks the URL for a 404 code, a 200 code, and the string # "entry found" (should always fail) probe = EntryProbing( HTTPProbingRequest("http://test.com/", method="GET")) probe.add_response_handler(HTTPStatusProbingResponse(404))\ .add_response_handler(HTTPStatusProbingResponse(200))\ .add_response_handler(TextMatchProbingResponse("entry found")) self.assertFalse(probe.check_entry()) # just requests without any checks (should default to True) probe = EntryProbing( HTTPProbingRequest("http://test.com/", method="GET")) self.assertTrue(probe.check_entry()) # check if response is stored properly probe = EntryProbing( HTTPProbingRequest("http://test.com/", method="GET")) self.assertIsNone(probe.response) # Set the always_request option to True so that we have a response probe.check_entry(always_request=True) self.assertTrue(isinstance(probe.response, ResponseData))
def test_probing_not_found_async(self): """ Tests the general working cases for probing a not found entry using an asynchronous method """ # mock of the page to be accessed page = create_mock_pyp_page("text/html", 404, "entry not found") # checks the page for a 200 code and the string "entry found" probe = EntryProbing(PyppeteerProbingRequest(page)) probe.add_response_handler(HTTPStatusProbingResponse(200))\ .add_response_handler(TextMatchProbingResponse("entry found")) self.assertFalse( self.loop.run_until_complete(probe.async_check_entry())) # checks the page for a non-404 code and the string "entry found" probe = EntryProbing(PyppeteerProbingRequest(page)) probe.add_response_handler(HTTPStatusProbingResponse(404, opposite=True))\ .add_response_handler(TextMatchProbingResponse("entry found")) self.assertFalse( self.loop.run_until_complete(probe.async_check_entry())) # checks the page for a 404 code and the string "not found" probe = EntryProbing(PyppeteerProbingRequest(page)) probe.add_response_handler(HTTPStatusProbingResponse(404))\ .add_response_handler(TextMatchProbingResponse("not found")) self.assertTrue(self.loop.run_until_complete( probe.async_check_entry())) # checks the page for a non-503 code probe = EntryProbing(PyppeteerProbingRequest(page)) probe.add_response_handler( HTTPStatusProbingResponse(503, opposite=True)) self.assertTrue(self.loop.run_until_complete( probe.async_check_entry())) # check if response is stored properly probe = EntryProbing(PyppeteerProbingRequest(page)) self.assertIsNone(probe.response) # Set the always_request option to True so that we have a response self.loop.run_until_complete( probe.async_check_entry(always_request=True)) self.assertTrue(isinstance(probe.response, ResponseData))
def test_probing_not_found_sync(self): """ Tests the general working cases for probing a not found entry using a non asynchronous method """ # Changes the method used by the HTTPProbingRequest when using GET to # use our mock HTTPProbingRequest.REQUEST_METHODS["GET"] = self.response_404 # checks the URL for a 200 code and the string "entry found" probe = EntryProbing( HTTPProbingRequest("http://test.com/", method="GET")) probe.add_response_handler(HTTPStatusProbingResponse(200))\ .add_response_handler(TextMatchProbingResponse("entry found")) self.assertEqual(probe.check_entry(), False) # checks the URL for a non-404 code and the string "entry found" probe = EntryProbing( HTTPProbingRequest("http://test.com/", method="GET")) probe.add_response_handler(HTTPStatusProbingResponse(404, opposite=True))\ .add_response_handler(TextMatchProbingResponse("entry found")) self.assertEqual(probe.check_entry(), False) # checks the URL for a 404 code and the string "not found" probe = EntryProbing( HTTPProbingRequest("http://test.com/", method="GET")) probe.add_response_handler(HTTPStatusProbingResponse(404))\ .add_response_handler(TextMatchProbingResponse("not found")) self.assertEqual(probe.check_entry(), True) # checks the URL for a non-503 code probe = EntryProbing( HTTPProbingRequest("http://test.com/", method="GET")) probe.add_response_handler( HTTPStatusProbingResponse(503, opposite=True)) self.assertEqual(probe.check_entry(), True) # check if response is stored properly probe = EntryProbing( HTTPProbingRequest("http://test.com/", method="GET")) self.assertIsNone(probe.response) # Set the always_request option to True so that we have a response probe.check_entry(always_request=True) self.assertTrue(isinstance(probe.response, ResponseData))
def test_probing_binary_async(self): """ Tests the general working cases for probing a page with binary content using an asynchronous method with Pyppeteer """ # mock of the page to be accessed, with a binary MIME type and a 200 # status code (the text value of "entry found" is included so we can # check if it is properly discarded) page = create_mock_pyp_page("application/octet-stream", 200, "entry found") # checks the page for a 200 code and binary content probe = EntryProbing(PyppeteerProbingRequest(page)) probe.add_response_handler(HTTPStatusProbingResponse(200))\ .add_response_handler(BinaryFormatProbingResponse()) self.assertTrue(self.loop.run_until_complete( probe.async_check_entry())) # the same as above but checks for text content probe = EntryProbing(PyppeteerProbingRequest(page)) probe.add_response_handler(HTTPStatusProbingResponse(200))\ .add_response_handler(BinaryFormatProbingResponse(opposite=True)) self.assertFalse( self.loop.run_until_complete(probe.async_check_entry())) # checks for the string "entry found" in the content (should fail # since the text is ignored) probe = EntryProbing(PyppeteerProbingRequest(page)) probe.add_response_handler(TextMatchProbingResponse("entry found")) self.assertFalse( self.loop.run_until_complete(probe.async_check_entry()))
def test_probing_binary_sync(self): """ Tests the general cases for probing a page with binary content using a non asynchronous method. """ # Changes the method used by the HTTPProbingRequest when using GET to # use our mock HTTPProbingRequest.REQUEST_METHODS["GET"] = self.response_binary # checks the URL for a 200 code and binary content probe = EntryProbing( HTTPProbingRequest("http://test.com/", method="GET")) probe.add_response_handler(HTTPStatusProbingResponse(200))\ .add_response_handler(BinaryFormatProbingResponse()) self.assertTrue(probe.check_entry()) # the same as above but checks for text content probe = EntryProbing( HTTPProbingRequest("http://test.com/", method="GET")) probe.add_response_handler(HTTPStatusProbingResponse(200))\ .add_response_handler(BinaryFormatProbingResponse(opposite=True)) self.assertFalse(probe.check_entry()) # checks for the string "entry found" in the content (should fail # since the text is ignored) probe = EntryProbing( HTTPProbingRequest("http://test.com/", method="GET")) probe.add_response_handler(TextMatchProbingResponse("entry found")) self.assertFalse(probe.check_entry())
def test_probing_found_async(self): """ Tests the general working cases for probing a found entry using an asynchronous method with Pyppeteer """ # mock of the page to be accessed page = create_mock_pyp_page("text/html", 200, "entry found") # checks the page for a 200 code, the string "entry found" and a text # type probe = EntryProbing(PyppeteerProbingRequest(page)) probe.add_response_handler(BinaryFormatProbingResponse(opposite=True))\ .add_response_handler(HTTPStatusProbingResponse(200))\ .add_response_handler(TextMatchProbingResponse("entry found")) self.assertTrue(self.loop.run_until_complete( probe.async_check_entry())) # the same as above but checks for a binary file probe = EntryProbing(PyppeteerProbingRequest(page)) probe.add_response_handler(HTTPStatusProbingResponse(200))\ .add_response_handler(BinaryFormatProbingResponse())\ .add_response_handler(TextMatchProbingResponse("entry found")) self.assertFalse( self.loop.run_until_complete(probe.async_check_entry())) # checks the page for a non-404 code and the string "entry found" probe = EntryProbing(PyppeteerProbingRequest(page)) probe.add_response_handler(HTTPStatusProbingResponse(404, opposite=True))\ .add_response_handler(TextMatchProbingResponse("entry found")) self.assertTrue(self.loop.run_until_complete( probe.async_check_entry())) # checks the page for a 404 code, a 200 code, and the string # "entry found" (should always fail) probe = EntryProbing(PyppeteerProbingRequest(page)) probe.add_response_handler(HTTPStatusProbingResponse(404))\ .add_response_handler(HTTPStatusProbingResponse(200))\ .add_response_handler(TextMatchProbingResponse("entry found")) self.assertFalse( self.loop.run_until_complete(probe.async_check_entry())) # just requests without any checks (should default to True) probe = EntryProbing(PyppeteerProbingRequest(page)) self.assertTrue(self.loop.run_until_complete( probe.async_check_entry())) # check if response is stored properly probe = EntryProbing(PyppeteerProbingRequest(page)) self.assertIsNone(probe.response) # Set the always_request option to True so that we have a response self.loop.run_until_complete( probe.async_check_entry(always_request=True)) self.assertTrue(isinstance(probe.response, ResponseData))
def __filter_range( limits: Tuple[Union[int, datetime.date], Union[int, datetime.date]], entry_probe: EntryProbing, step_size: Union[int, relativedelta], mid_calc: Callable[[Any, Any], int], range_gen: Generator, extra_params: Optional[List[Any]] = None, preprocess: Callable[[Any, Any], Any] = lambda x: x ) -> Union[int, datetime.date]: """ Does a binary search in the given range to discover which part of it actually contains results, but checks cons_misses entries at a time before doing the division step. This method works for both dates and integers, and contains the barebones algorithm only. :param limits: tuple with lower and upper limits for the range to be checked :param entry_probe: instance of EntryProbing describing the request method and response validation :param step_size: value to be added to a given index to go to the next one :param mid_calc: function which takes the beginning and end of the current range being considered and calculates the midpoint :param range_gen: generator which takes the current mid point and the beginning and end of the current range being considered and yields all the points near the middle that we need to check :param extra_params: list of extra parameters to be sent during probing (must include one "None" entry, which represents the position for the filtered parameter) :param preprocess: function to be applied to each generated entry to search (identity function by default) :returns: position where the last hit entry was found, None if no entries were found """ # Validate inputs RangeInference.__range_validate_input(limits, entry_probe, step_size, mid_calc, range_gen, extra_params, preprocess) begin, end = limits last_hit = None delta = step_size curr_begin = begin curr_end = end # If extra_params is None, we insert this parameter as the only element # in a list (we signal this by setting the only entry to None) if extra_params is None: extra_params = [None] param_index = extra_params.index(None) params_instance = extra_params.copy() while curr_begin <= curr_end: mid = mid_calc(curr_begin, curr_end) # check the required number of entries before declaring a miss all_miss = True for i in range_gen(mid, curr_begin, curr_end): params_instance[param_index] = preprocess(i) if entry_probe.check_entry(params_instance): all_miss = False last_hit = i if all_miss: curr_end = mid - delta else: curr_begin = last_hit + delta return last_hit