def test_proxy_negotiate_fail(self, cr_conn_mock): loop_mock = mock.Mock() cr_conn_mock.side_effect = \ fake_coroutine(aiosocks.SocksError()).side_effect req = ClientRequest('GET', 'http://python.org', loop=self.loop) connector = SocksConnector(aiosocks.Socks5Addr('127.0.0.1'), None, loop=loop_mock) loop_mock.getaddrinfo = fake_coroutine([mock.MagicMock()]) with self.assertRaises(aiosocks.SocksError): self.loop.run_until_complete(connector.connect(req))
def get_tor_connector(string): ip, port = parse_proxy_address(string) login, password = generate_credentials() addr = aiosocks.Socks5Addr(ip, int(port)) auth = aiosocks.Socks5Auth(login, password=password) conn = SocksConnector(proxy=addr, proxy_auth=auth, remote_resolve=True) return conn
def test_connect_remote_resolve(self, cr_conn_mock): tr, proto = mock.Mock(name='transport'), mock.Mock(name='protocol') cr_conn_mock.side_effect = \ fake_coroutine((tr, proto)).side_effect req = ClientRequest('GET', 'http://python.org', loop=self.loop) connector = SocksConnector(aiosocks.Socks5Addr('127.0.0.1'), None, loop=self.loop, remote_resolve=True) connector._resolve_host = fake_coroutine([mock.MagicMock()]) conn = self.loop.run_until_complete(connector.connect(req)) self.assertEqual(connector._resolve_host.call_count, 1) conn.close()
def test_fingerprint_success(self): with fake_socks4_srv(self.loop) as proxy_port: addr = aiosocks.Socks4Addr('127.0.0.1', proxy_port) fp = (b's\x93\xfd:\xed\x08\x1do\xa9\xaeq9' b'\x1a\xe3\xc5\x7f\x89\xe7l\xf9') conn = SocksConnector(proxy=addr, proxy_auth=None, loop=self.loop, remote_resolve=False, verify_ssl=False, fingerprint=fp) with http_srv(self.loop, use_ssl=True) as url: with aiohttp.ClientSession(connector=conn, loop=self.loop) as ses: @asyncio.coroutine def make_req(): return (yield from ses.request('get', url=url)) resp = self.loop.run_until_complete(make_req()) self.assertEqual(resp.status, 200) content = self.loop.run_until_complete(resp.text()) self.assertEqual(content, 'Test message') resp.close()
def test_https_connect(self): with fake_socks4_srv(self.loop) as proxy_port: addr = aiosocks.Socks4Addr('127.0.0.1', proxy_port) conn = SocksConnector(proxy=addr, proxy_auth=None, loop=self.loop, remote_resolve=False, verify_ssl=False) with http_srv(self.loop, use_ssl=True) as url: with aiohttp.ClientSession(connector=conn, loop=self.loop) as ses: @asyncio.coroutine def make_req(): return (yield from ses.request('get', url=url)) resp = self.loop.run_until_complete(make_req()) self.assertEqual(resp.status, 200) content = self.loop.run_until_complete(resp.text()) self.assertEqual(content, 'Test message') resp.close()
async def worker(urls_q, proxies_q, proxies_q_good): while True: # Get proxy server from Queue and make proxy row string if not proxies_q_good.empty(): proxy = await proxies_q_good.get() else: proxy = await proxies_q.get() if proxy is None: await asyncio.sleep(1) continue row = 'http://{host}:{port}'.format(host=proxy.host, port=proxy.port) # Get url from Queue if urls_q.empty(): return else: keyword = await urls_q.get() url = base_link.format(keyword.replace(' ', '+')) print(row, '-->', 'url', '| WPQ:', proxies_q_good.qsize(), '| APQ:', proxies_q.qsize()) # except Exception as e: # print(type(e), e, '[worker, print.Exception]') # continue # Make http request with SOCKS proxy try: addr = aiosocks.Socks5Addr(proxy.host, proxy.port) conn = SocksConnector(proxy=addr) with async_timeout.timeout(30): async with aiohttp.ClientSession( connector=conn) as http_client: async with http_client.get(url, headers=headers) as resp: assert resp.status == 200 code = await resp.text() assert 'body' in code except Exception as e: print(type(e), e, '[worker, http_client.Exception]') await urls_q.put(keyword) continue # If proxy is working put it into good (working) proxies Queue again proxies_q_good.put_nowait(proxy) # Create dictionary data, to save it into database try: position = get_position(code) with open('data/google_positions_result.txt', 'a', encoding='utf-8') as result: result.write('{}\t{}\n'.format(keyword, position)) except Exception as e: print(type(e), e, '[data_formatting Exception]') continue await asyncio.sleep(1)
def test_connect_proxy_domain(self, cr_conn_mock): tr, proto = mock.Mock(name='transport'), mock.Mock(name='protocol') cr_conn_mock.side_effect = \ fake_coroutine((tr, proto)).side_effect loop_mock = mock.Mock() req = ClientRequest('GET', 'http://python.org', loop=self.loop) connector = SocksConnector(aiosocks.Socks5Addr('proxy.example'), None, loop=loop_mock) connector._resolve_host = fake_coroutine([mock.MagicMock()]) conn = self.loop.run_until_complete(connector.connect(req)) self.assertTrue(connector._resolve_host.is_called) self.assertEqual(connector._resolve_host.call_count, 1) self.assertIs(conn._transport, tr) conn.close()
async def main(): response = await aiohttp.get('http://icanhazip.com/') body = await response.text() print('ip: {}'.format(body.strip())) addr = aiosocks.Socks5Addr('127.0.0.1', 9050) conn = SocksConnector(proxy=addr, remote_resolve=False) response = await aiohttp.get('http://icanhazip.com/', connector=conn) body = await response.text() print('tor ip: {}'.format(body.strip()))
def test_connect_proxy_ip(self, cr_conn_mock): tr, proto = mock.Mock(name='transport'), mock.Mock(name='protocol') cr_conn_mock.side_effect = \ fake_coroutine((tr, proto)).side_effect loop_mock = mock.Mock() req = ClientRequest('GET', 'http://python.org', loop=self.loop) connector = SocksConnector(aiosocks.Socks5Addr('127.0.0.1'), None, loop=loop_mock) loop_mock.getaddrinfo = fake_coroutine([mock.MagicMock()]) conn = self.loop.run_until_complete(connector.connect(req)) self.assertTrue(loop_mock.getaddrinfo.is_called) self.assertIs(conn._transport, tr) conn.close()
async def worker(urls_q, proxies_q, proxies_q_good): while True: # Get proxy server from Queue and make proxy row string if not proxies_q_good.empty(): proxy = await proxies_q_good.get() else: proxy = await proxies_q.get() if proxy is None: await asyncio.sleep(1) continue row = 'http://{host}:{port}'.format(host=proxy.host, port=proxy.port) # Get url from Queue if urls_q.empty(): return else: page_url, link_text, link_url = await urls_q.get() url = base_link.format(quote('cache:' + link_url)) print(row, '-->', url, '| WPQ:', proxies_q_good.qsize(), '| APQ:', proxies_q.qsize()) # Make http request with SOCKS proxy try: addr = aiosocks.Socks5Addr(proxy.host, proxy.port) conn = SocksConnector(proxy=addr) with async_timeout.timeout(30): async with aiohttp.ClientSession(connector=conn) as http_client: async with http_client.get(url, headers=headers) as resp: # assert resp.status == 200 code = await resp.text() assert (link_url in code or 'Not Found' in code) except Exception as e: print(type(e), e, '[worker, http_client.Exception]', link_url) await urls_q.put(tuple(page_url, link_text, link_url)) continue # If proxy is working put it into good (working) proxies Queue again proxies_q_good.put_nowait(proxy) # Create dictionary data, to save it into database try: indexed = get_data(code, page_url) with open('data/google_cache_result.txt', 'a', encoding='utf-8') as result: if indexed: result.write('{}; {}\n'.format(link_url, 'indexed')) else: result.write('{}; {}\n'.format(link_url, 'no')) except Exception as e: print(type(e), e, '[data_formatting Exception]', link_url) continue await asyncio.sleep(1)
def socks_connector(proxy, loop=None): loop = loop or get_event_loop() proxy = URL(proxy) auth = None if proxy.scheme == 'socks4': addr = Socks4Addr(proxy.host, proxy.port) else: addr = Socks5Addr(proxy.host, proxy.port) if proxy.user and proxy.password: auth = Socks5Auth(proxy.user, proxy.password) return SocksConnector(proxy=addr, proxy_auth=auth, limit=300, loop=loop, remote_resolve=False, verify_ssl=False)
def __init__(self, adress, port, login=None, password=None, timeout=10, loop=None): super().__init__(timeout) self.close() addr = aiosocks.Socks5Addr(adress, port) if login and password: auth = aiosocks.Socks5Auth(login, password=password) else: auth = None conn = SocksConnector(proxy=addr, proxy_auth=auth, loop=loop) self.session = aiohttp.ClientSession( connector=conn, response_class=CustomClientResponse)
async def get_session(self, *args, **kwargs): circuit_id = kwargs.pop('circuit_id', None) skip_auto = kwargs.pop('skip_auto_headers', []) headers = kwargs.pop('headers', {}) if self.user_agent is not None and 'User-Agent' not in headers: skip_auto.append('User-Agent') headers['User-Agent'] = self.user_agent() if callable(self.user_agent) else self.user_agent socks_proxy = aiosocks.Socks5Addr('127.0.0.1', self.socks_port) # note very important limit parameter, thanks to this session can be bound to particular circuit id connector = SocksConnector(proxy=socks_proxy, remote_resolve=False, force_close=False, limit=1) session = aiohttp.ClientSession(connector=connector, skip_auto_headers=skip_auto, headers=headers) if circuit_id: session.circuit_id = circuit_id elif not self._circuit_ids and circuit_id is None: raise IndexError('No circuits in self.circuit_ids') else: session.circuit_id = random.choice(tuple(self._circuit_ids)) session.get = functools.partial(self.get, session=session) return session
def cli(url, debug, force_update, conn_count, proxy): click.echo('version: {}'.format(__version__)) if debug: logging.root.setLevel(logging.DEBUG) asyncio.get_event_loop().set_debug(True) logging.info('conn number is %s', conn_count) logging.info('proxy is %s', proxy) logging.info('force-update is %s', force_update) logging.debug('settings: {}'.format(settings)) conn_kwargs = dict(use_dns_cache=True, limit=conn_count, conn_timeout=60) if not proxy: _conn = aiohttp.TCPConnector(**conn_kwargs) else: _conn = SocksConnector(aiosocks.Socks5Addr(proxy[0], proxy[1]), **conn_kwargs) network.session = aiohttp.ClientSession(connector=_conn) try: try: if proxy: logging.info('Test whether proxy config is correct') loop = asyncio.get_event_loop() loop.run_until_complete(verify_proxy()) except (aiohttp.errors.ProxyConnectionError, ConnectionRefusedError, AssertionError) as e: print('Proxy config is wrong!\n {}'.format(e)) else: start(url, force_update) finally: network.session.close() if bundle_env: click.echo('\n') input('Press any key to exit')
def __init__(self, take_ownership=True, # Tor dies when the Sorter does torrc_config={"ControlPort": "9051", "CookieAuth": "1"}, socks_port=9050, page_load_timeout=20, max_tasks=10, db_handler=None): self.logger = setup_logging(_log_dir, "sorter") self.db_handler = db_handler self.logger.info("Opening event loop for Sorter...") self.loop = asyncio.get_event_loop() self.max_tasks = max_tasks self.logger.info("Creating Sorter queue...") self.q = asyncio.Queue() # Start tor and create an aiohttp tor connector self.torrc_config = torrc_config self.socks_port = str(find_free_port(socks_port)) self.torrc_config.update({"SocksPort": self.socks_port}) self.logger.info("Starting tor process with config " "{self.torrc_config}.".format(**locals())) self.tor_process = launch_tor_with_config(config=self.torrc_config, take_ownership=take_ownership) onion_proxy = aiosocks.Socks5Addr('127.0.0.1', socks_port) conn = SocksConnector(proxy=onion_proxy, remote_resolve=True) # aiohttp's ClientSession does connection pooling and HTTP keep-alives # for us self.logger.info("Creating aiohttp ClientSession with our event loop " "and tor proxy connector...") self.session = aiohttp.ClientSession(loop=self.loop, connector=conn) # Pretend we're Tor Browser in order to get rejected by less sites/WAFs u = "Mozilla/5.0 (Windows NT 6.1; rv:45.0) Gecko/20100101 Firefox/45.0" self.headers = {'user-agent': u} self.page_load_timeout = page_load_timeout
async def cn_proxy_spider(ip_all, ip_num, proxy=None): cn_proxy_url = "http://cn-proxy.com/" proxy_addr, proxy_port = list(proxy.items())[0] conn = SocksConnector(proxy=aiosocks.Socks5Addr(proxy_addr, proxy_port), proxy_auth=None, remote_resolve=True) async with aiohttp.ClientSession(connector=conn) as session: async with session.get(cn_proxy_url) as resp: content = await resp.text() soup = BeautifulSoup(content, 'lxml') tbodys = soup.find_all('tbody') for tbody in tbodys: for _ in tbody.find_all('tr'): td = _.find_all('td') ip_all.append({ 'ip': 'http://' + td[0].string + ':' + td[1].string, 'addr': td[2].string.split(" ")[0], 'time': td[-1].string })
def test_fingerprint_fail(self): with fake_socks4_srv(self.loop) as proxy_port: addr = aiosocks.Socks4Addr('127.0.0.1', proxy_port) fp = (b's\x93\xfd:\xed\x08\x1do\xa9\xaeq9' b'\x1a\xe3\xc5\x7f\x89\xe7l\x10') conn = SocksConnector(proxy=addr, proxy_auth=None, loop=self.loop, remote_resolve=False, verify_ssl=False, fingerprint=fp) with http_srv(self.loop, use_ssl=True) as url: with aiohttp.ClientSession(connector=conn, loop=self.loop) as ses: @asyncio.coroutine def make_req(): return (yield from ses.request('get', url=url)) with self.assertRaises(aiohttp.FingerprintMismatch): self.loop.run_until_complete(make_req())
def test_properties(self): addr = aiosocks.Socks4Addr('localhost') auth = aiosocks.Socks4Auth('login') conn = SocksConnector(addr, auth, loop=self.loop) self.assertIs(conn.proxy, addr) self.assertIs(conn.proxy_auth, auth)