def test_add_exceed_memory(self): q = OrderedCachedQueue(maxsize=2) # These two go to the in memory queue q.put(create_simple_fuzzable_request(1)) q.put(create_simple_fuzzable_request(2)) self.assertEqual(q.qsize(), 2) self.assertEqual(len(q.memory), 2) # This one goes to the disk queue q.put(create_simple_fuzzable_request(3)) self.assertEqual(q.qsize(), 3) self.assertEqual(len(q.memory), 2) self.assertEqual(len(q.disk), 1) # Get all self.assertEqual(read_fuzzable_request_parameter(q.get()), 1) self.assertEqual(len(q.memory), 1) self.assertEqual(len(q.disk), 1) self.assertEqual(read_fuzzable_request_parameter(q.get()), 2) self.assertEqual(len(q.memory), 0) self.assertEqual(len(q.disk), 1) self.assertEqual(read_fuzzable_request_parameter(q.get()), 3) self.assertEqual(len(q.memory), 0) self.assertEqual(len(q.disk), 0) self.assertEqual(q.qsize(), 0)
def test_put_same_fuzzable_request_twice(self): q = OrderedCachedQueue(maxsize=2) q.put(create_simple_fuzzable_request(1)) q.put(create_simple_fuzzable_request(1)) self.assertEqual(q.get(), q.get())
def test_exceptions_no_fail_sync_pointer(self): q = OrderedCachedQueue(maxsize=2) q.put(create_simple_fuzzable_request(1)) q.get() self.assertRaises(Exception, q.get, block=False) q.put(create_simple_fuzzable_request(1)) self.assertEquals(read_fuzzable_request_parameter(q.get()), 1)
def test_put_none_after_fuzzable_request(self): q = OrderedCachedQueue(maxsize=2) q.put(create_simple_fuzzable_request(1)) q.put(None) # Reads the fuzzable request q.get() # Reads the None self.assertIsNone(q.get())
def test_prefer_memory_over_disk(self): q = OrderedCachedQueue(maxsize=2) # These two go to the in memory queue q.put(create_simple_fuzzable_request(1)) q.put(create_simple_fuzzable_request(2)) # This one goes to the disk queue q.put(create_simple_fuzzable_request(3)) # Read one from memory q.get() self.assertEqual(len(q.memory), 1) self.assertEqual(len(q.disk), 1) # Write one to memory q.put(create_simple_fuzzable_request(3)) self.assertEqual(len(q.memory), 2) self.assertEqual(len(q.disk), 1)
def test_simple_rpm_speed(self): q = OrderedCachedQueue() self.assertEqual(0.0, q.get_input_rpm()) self.assertEqual(0.0, q.get_output_rpm()) for i in xrange(4): q.put(create_simple_fuzzable_request(i)) # 20 RPM time.sleep(3) self.assertEqual(q.qsize(), 4) self.assertGreater(q.get_input_rpm(), 19) self.assertLess(q.get_input_rpm(), 20) for i in xrange(4): q.get() # 60 RPM time.sleep(1) self.assertGreater(q.get_output_rpm(), 59) self.assertLess(q.get_output_rpm(), 60) self.assertEqual(q.qsize(), 0)
def test_read_in_order(self): q = OrderedCachedQueue(maxsize=2) hash_list = [] for i in xrange(5): fr = create_simple_fuzzable_request(i) hash_list.append(fr.get_hash()) q.put(fr) unordered_hash_list = hash_list[:] hash_list.sort() self.assertNotEqual(unordered_hash_list, hash_list) for i in xrange(4): fr = q.get() self.assertEqual(fr.get_hash(), hash_list[i])
class CrawlInfrastructure(BaseConsumer): """ Consumer thread that takes fuzzable requests from the input Queue that is seeded by the core, sends each fr to all crawl and infrastructure plugins, get()'s the output from those plugins and puts them in the input Queue again for continuing with the discovery process. """ def __init__(self, crawl_infrastructure_plugins, w3af_core, max_discovery_time): """ :param crawl_infrastructure_plugins: Instances of CrawlInfrastructure plugins in a list :param w3af_core: The w3af core that we'll use for status reporting :param max_discovery_time: The max time (in seconds) to use for the discovery phase """ super(CrawlInfrastructure, self).__init__(crawl_infrastructure_plugins, w3af_core, thread_name=self.get_name(), max_pool_queued_tasks=100) self._max_discovery_time = int(max_discovery_time) # For filtering fuzzable requests found by plugins: self._variant_db = VariantDB() self._disabled_plugins = set() self._running = True self._report_max_time = True self._reported_found_urls = ScalableBloomFilter() # Override BaseConsumer.in_queue in order to have an ordered queue for # our crawling process. # # Read OrderedCachedQueue's documentation to understand why order is # important self.in_queue = OrderedCachedQueue(maxsize=10, name=self.get_name() + 'In') def get_name(self): return 'CrawlInfra' def run(self): """ Consume the queue items, sending them to the plugins which are then going to find vulnerabilities, new URLs, etc. """ while True: try: work_unit = self.in_queue.get(timeout=0.1) except KeyboardInterrupt: # https://github.com/andresriancho/w3af/issues/9587 # # If we don't do this, the thread will die and will never # process the POISON_PILL, which will end up in an endless # wait for .join() continue except Queue.Empty: # pylint: disable=E1120 try: self._route_all_plugin_results() except KeyboardInterrupt: continue # pylint: enable=E1120 else: if work_unit == POISON_PILL: self._log_queue_sizes() try: self._process_poison_pill() except Exception, e: msg = 'An exception was found while processing poison pill: "%s"' om.out.debug(msg % e) finally: self._running = False self.in_queue.task_done() break else: # With specific error/success handling just for debugging try: self._consume(work_unit) finally: self.in_queue.task_done() # Free memory work_unit = None
class CrawlInfrastructure(BaseConsumer): """ Consumer thread that takes fuzzable requests from the input Queue that is seeded by the core, sends each fr to all crawl and infrastructure plugins, get()'s the output from those plugins and puts them in the input Queue again for continuing with the discovery process. """ def __init__(self, crawl_infrastructure_plugins, w3af_core, max_discovery_time): """ :param crawl_infrastructure_plugins: Instances of CrawlInfrastructure plugins in a list :param w3af_core: The w3af core that we'll use for status reporting :param max_discovery_time: The max time (in seconds) to use for the discovery phase """ super(CrawlInfrastructure, self).__init__(crawl_infrastructure_plugins, w3af_core, thread_name=self.get_name(), max_pool_queued_tasks=100) self._max_discovery_time = int(max_discovery_time) # For filtering fuzzable requests found by plugins: self._variant_db = VariantDB() self._disabled_plugins = set() self._running = True self._report_max_time = True self._reported_found_urls = ScalableBloomFilter() # Override BaseConsumer.in_queue in order to have an ordered queue for # our crawling process. # # Read OrderedCachedQueue's documentation to understand why order is # important self.in_queue = OrderedCachedQueue(maxsize=10, name=self.get_name() + 'In') def get_name(self): return 'CrawlInfra' def run(self): """ Consume the queue items, sending them to the plugins which are then going to find vulnerabilities, new URLs, etc. """ while True: try: work_unit = self.in_queue.get(timeout=0.1) except KeyboardInterrupt: # https://github.com/andresriancho/w3af/issues/9587 # # If we don't do this, the thread will die and will never # process the POISON_PILL, which will end up in an endless # wait for .join() continue except Queue.Empty: # pylint: disable=E1120 try: self._route_all_plugin_results() except KeyboardInterrupt: continue # pylint: enable=E1120 else: if work_unit == POISON_PILL: try: # Close the pool and wait for everyone to finish self._threadpool.close() self._threadpool.join() self._threadpool = None self._running = False self._teardown() finally: # Finish this consumer and everyone consuming the output self._out_queue.put(POISON_PILL) self.in_queue.task_done() self.set_has_finished() break else: # With specific error/success handling just for debugging try: self._consume(work_unit) finally: self.in_queue.task_done() # Free memory work_unit = None def _teardown(self, plugin=None): """ End plugins """ to_teardown = self._consumer_plugins if plugin is not None: to_teardown = [plugin] # When we disable a plugin because it raised a RunOnceException, # we call .end(), so no need to call the same method twice to_teardown = set(to_teardown) - self._disabled_plugins msg = 'Starting CrawlInfra consumer _teardown() with %s plugins.' om.out.debug(msg % len(to_teardown)) for plugin in to_teardown: om.out.debug('Calling %s.end().' % plugin.get_name()) start_time = time.time() try: plugin.end() except ScanMustStopException: # If we reach this exception here we don't care much # since the scan is ending already. The log message stating # that the scan will end because of this error was already # delivered by the HTTP client. # # We `pass` instead of `break` because some plugins might # still be able to `end()` without sending HTTP requests to # the remote server msg_fmt = ('Spent %.2f seconds running %s.end() until a' ' scan must stop exception was raised.') self._log_end_took(msg_fmt, start_time, plugin) except Exception, e: msg_fmt = ('Spent %.2f seconds running %s.end() until an' ' unhandled exception was found.') self._log_end_took(msg_fmt, start_time, plugin) self.handle_exception('crawl', plugin.get_name(), 'plugin.end()', e) else: msg_fmt = 'Spent %.2f seconds running %s.end().' self._log_end_took(msg_fmt, start_time, plugin) finally: