def test_delay_add_available(self): sessions = Sessions(delay=1) session = sessions.sessions[0] with patch('dirhunt.sessions.threading.Timer') as m: sessions.add_available(session) m.assert_called_once_with(sessions.delay, sessions.availables.put, [session]) m.return_value.start.assert_called_once()
def callback(self, domain): url = VT_URL.format(domain=domain) session = Sessions().get_session() html = session.get(url).text if ABUSE in html: self.add_error(ABUSE_MESSAGE_ERROR.format(url=url)) return soup = BeautifulSoup(html, 'html.parser') for url in soup.select('#detected-urls .enum a'): self.add_result(url.text.strip(string.whitespace))
def __init__(self, max_workers=None, interesting_extensions=None, interesting_files=None, std=None, progress_enabled=True, timeout=10, depth=3, not_follow_subdomains=False, exclude_sources=(), not_allow_redirects=False, proxies=None, delay=0, limit=1000, to_file=None, user_agent=None, cookies=None, headers=None): if not max_workers and not delay: max_workers = (multiprocessing.cpu_count() or 1) * 5 elif not max_workers and delay: max_workers = len(proxies or [None]) super(Crawler, self).__init__(max_workers) self.domains = set() self.results = Queue() self.index_of_processors = [] self.proxies = proxies self.delay = delay self.sessions = Sessions(proxies, delay, user_agent, cookies, headers) self.processing = {} self.processed = {} self.add_lock = Lock() self.spinner = random_spinner() self.start_dt = datetime.datetime.now() self.interesting_extensions = interesting_extensions or [] self.interesting_files = interesting_files or [] self.closing = False self.std = std or None self.progress_enabled = progress_enabled self.timeout = timeout self.not_follow_subdomains = not_follow_subdomains self.depth = depth self.exclude_sources = exclude_sources self.sources = Sources(self.add_url, self.add_message, exclude_sources) self.not_allow_redirects = not_allow_redirects self.limit = limit self.current_processed_count = 0 self.to_file = to_file
def test_random_proxy_positive(self, m): proxy_instance = Mock() with patch('dirhunt.sessions.normalize_proxy', return_value=proxy_instance): sessions = Sessions() session = sessions.sessions[0] self.assertIs(session.proxy, proxy_instance) session_mock = Mock() session.session = session_mock session.get(self.url) proxy_instance.positive.assert_called_once()
def test_proxy(self): proxy = 'http://10.1.2.3:3128' sessions = Sessions([proxy]) session = sessions.sessions[0] m = Mock() session.session = m session.get(self.url) m.get.assert_called_once_with(self.url, proxies={ 'http': proxy, 'https': proxy })
def _test_random_proxy_negative(self, votes): proxy_instance = Mock() proxy_instance.get_updated_proxy.return_value.votes = votes with patch('dirhunt.sessions.normalize_proxy', return_value=proxy_instance): sessions = Sessions() session = sessions.sessions[0] self.assertIs(session.proxy, proxy_instance) session_mock = Mock(**{'get.side_effect': ProxyError}) session.session = session_mock with self.assertRaises(ProxyError): session.get(self.url) return proxy_instance
def __init__(self, max_workers=None, interesting_extensions=None, interesting_files=None, std=None, progress_enabled=True, timeout=10, depth=3, not_follow_subdomains=False, exclude_sources=(), not_allow_redirects=False): super(Crawler, self).__init__(max_workers) self.domains = set() self.results = Queue() self.index_of_processors = [] self.sessions = Sessions() self.processing = {} self.processed = {} self.add_lock = Lock() self.spinner = random_spinner() self.start_dt = datetime.datetime.now() self.interesting_extensions = interesting_extensions or [] self.interesting_files = interesting_files or [] self.closing = False self.std = std or None self.progress_enabled = progress_enabled self.timeout = timeout self.not_follow_subdomains = not_follow_subdomains self.depth = depth self.sources = Sources(self.add_url, exclude_sources) self.not_allow_redirects = not_allow_redirects
def __init__(self, max_workers=None, interesting_extensions=None, interesting_files=None, std=None, progress_enabled=True, timeout=10): super(Crawler, self).__init__(max_workers) self.domains = set() self.results = Queue() self.index_of_processors = [] self.sessions = Sessions() self.processing = {} self.processed = {} self.add_lock = Lock() self.spinner = random_spinner() self.start_dt = datetime.datetime.now() self.interesting_extensions = interesting_extensions or [] self.interesting_files = interesting_files or [] self.closing = False self.std = std or None self.progress_enabled = progress_enabled self.timeout = timeout
def test_random_session(self): sessions = Sessions() sessions.availables.get() with patch('dirhunt.sessions.random.choice') as m: sessions.get_session() m.assert_called_once()
def _get_url_info(self): return UrlInfo(Sessions(), Url(self.url))
def test_echo(self): mstdout = Mock(**{'isatty.return_value': True}) UrlsInfo([], Sessions(), std=mstdout).echo('Foo') mstdout.write.assert_called()
def test_erase(self): mstdout = Mock(**{'isatty.return_value': True}) UrlsInfo([], Sessions(), std=mstdout).erase() mstdout.write.assert_called_once()
def test_start_empty(self): with patch.object(UrlsInfo, 'submit') as m: UrlsInfo([], Sessions()).start() m.assert_not_called()
def test_callback(self): with patch.object(UrlsInfo, '_get_url_info') as m: UrlsInfo([self.url], Sessions()).callback(len(self.url), Url(self.url), 0) m.assert_called_once()