def check_proxy(proxy: str, should_be_valid=True): try: proxy_validator.retrieve(proxy) if not should_be_valid: raise AssertionError("Proxy shouldn't be considered as valid") except proxy_validator.ValidationError as ex: if should_be_valid: raise AssertionError( "Proxy should be considered as valid. Message: {}".format(ex))
async def process_raw_proxy(self, proxy, collector_id): self.logger.debug("processing raw proxy \"{}\"".format(proxy)) try: _, auth_data, domain, port = proxy_validator.retrieve(proxy) except proxy_validator.ValidationError as ex: self.collectors_logger.error( "Collector with id \"{}\" returned bad raw proxy \"{}\". " "Message: {}".format(collector_id, proxy, ex) ) return # don't care about protocol try: proxy = await db.get( Proxy.select().where( Proxy.auth_data == auth_data, Proxy.domain == domain, Proxy.port == port, ) ) if proxy.last_check_time + settings.PROXY_NOT_CHECKING_PERIOD >= time.time(): proxy_short_address = "" if auth_data: proxy_short_address += auth_data + "@" proxy_short_address += "{}:{}".format(domain, port) self.logger.debug( "skipping proxy \"{}\" from collector \"{}\"".format( proxy_short_address, collector_id) ) return except Proxy.DoesNotExist: pass for raw_protocol in range(len(Proxy.PROTOCOLS)): while not self.good_proxies_are_processed: # TODO: find a better way await asyncio.sleep(0.1) new_proxy = Proxy() new_proxy.raw_protocol = raw_protocol new_proxy.auth_data = auth_data new_proxy.domain = domain new_proxy.port = port await self.add_proxy_to_queue(new_proxy, collector_id)
async def process_proxy(proxy_url: str): async with proxies_semaphore: try: _, auth_data, domain, port = proxy_validator.retrieve(proxy_url) except proxy_validator.ValidationError as ex: raise ValueError( "Your collector returned bad proxy \"{}\". Message: \"{}\"". format(proxy_url, ex)) is_working = False for raw_protocol in range(len(Proxy.PROTOCOLS)): proxy_url = "{}://".format(Proxy.PROTOCOLS[raw_protocol]) if auth_data: proxy_url += auth_data + "@" proxy_url += domain + ":" + str(port) start_checking_time = time.time() check_result, checker_additional_info = await proxy_utils.check_proxy( proxy_url) end_checking_time = time.time() if check_result: is_working = True break response_time = end_checking_time - start_checking_time color = '' if not is_working: color = 'red' elif response_time < 1: color = 'cyan' elif response_time < 5: color = 'green' elif response_time < 10: color = 'yellow' else: color = 'magenta' print(colored(' ', on_color='on_' + color), end='') sys.stdout.flush()