class ProxyHealthCheck: """ This class checks whether the proxies stored in redis database are working fine or not This is done by fetching the proxies with the status healthy from the redis The proxies which arent healthy will then have their status changed as unhealthy """ def __init__(self): self.instance = RedisProxy() self.healthy_proxies = self.instance.healthy_proxies() self.unhealthy_proxies = self.instance.unhealthy_proxies() def is_bad_proxy(self, proxy_dict): ip = "{ip}:{port_number}".format(ip=proxy_dict.get("ip"), port_number=proxy_dict.get("port")) print ip try: proxy_handler = urllib2.ProxyHandler({'http': ip}) opener = urllib2.build_opener(proxy_handler) opener.addheaders = [('User-agent', 'Mozilla/5.0')] urllib2.install_opener(opener) req=urllib2.Request('http://www.google.com') # change the URL to test here sock=urllib2.urlopen(req) except urllib2.HTTPError, e: print 'Error code: ', e.code return e.code except Exception, detail: print "ERROR:", detail return True
def redis(self): """ This method will take a proxy_list and then store it in the redis. In case, Unable to store in redis, A standard error will be raised with the exception messege from the class RDS which will then be handled here All the proxies scraped from xroxy will be put into redis with staus "unhealthy" as we would be not clear whether they are healthy or not, We will have to check them with our ProxyHealthCheck. """ redis_instance = RedisProxy() try: redis_instance.store_proxy_list(self.proxy_list, "unhealthy") except StandardError as e: print e
def __init__(self): self.instance = RedisProxy() self.healthy_proxies = self.instance.healthy_proxies() self.unhealthy_proxies = self.instance.unhealthy_proxies()