def wieistmeineip(self): result = {} # Save original socket originalSocket = socket.socket # Set TOR Socks proxy commonutils.setTorProxy() try: # Load soup = self.parse("http://www.wieistmeineip.de") location = soup.findAll("div", { "class" : "location" })[0] location = bs(location.text, convertEntities=bs.HTML_ENTITIES) ip = soup.findAll('div', id='ipv4')[0] raw_ip = bs(ip.text, convertEntities=bs.HTML_ENTITIES) pattern = re.compile('[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}') ip = re.search(pattern, raw_ip.text) result["ipaddress"] = ip.group(0) result["country"] = str(location) finally: # Removing SOCKS Tor Proxy socket.socket = originalSocket return result
def wieistmeineip(self): result = {} # Save original socket originalSocket = socket.socket # Set TOR Socks proxy commonutils.setTorProxy() try: # Load soup = self.parse("http://www.wieistmeineip.de") location = soup.findAll("div", {"class": "location"})[0] location = bs(location.text, convertEntities=bs.HTML_ENTITIES) ip = soup.findAll('div', id='ipv4')[0] raw_ip = bs(ip.text, convertEntities=bs.HTML_ENTITIES) pattern = re.compile( '[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}') ip = re.search(pattern, raw_ip.text) result["ipaddress"] = ip.group(0) result["country"] = str(location) finally: # Removing SOCKS Tor Proxy socket.socket = originalSocket return result
def __process_url(self, url): # Crawler config load cfgCrawler = Config( os.path.join(RAGPICKER_ROOT, 'config', 'crawler.conf')).get("clientConfig") data = None headers = { 'User-Agent': cfgCrawler.get("browser_user_agent", "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)"), 'Accept-Language': cfgCrawler.get("browser_accept_language", "en-US"), } # Save original socket originalSocket = socket.socket # Set TOR Socks proxy commonutils.setTorProxy() request = urllib2.Request(url, data, headers) try: url_dl = urllib2.urlopen(request, timeout=30).read() except urllib2.HTTPError as e: raise e except urllib2.URLError as e: raise e except Exception, e: raise IOError("Thread(" + self.processName + ") - %s - Error parsing %s" % (e, url))
def __process_url(self, url): # Crawler config load cfgCrawler = Config(os.path.join(RAGPICKER_ROOT, 'config', 'crawler.conf')).get("clientConfig") data = None headers = { 'User-Agent': cfgCrawler.get("browser_user_agent", "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)"), 'Accept-Language': cfgCrawler.get("browser_accept_language", "en-US"), } # Save original socket originalSocket = socket.socket # Set TOR Socks proxy commonutils.setTorProxy() request = urllib2.Request(url, data, headers) try: url_dl = urllib2.urlopen(request, timeout=30).read() except urllib2.HTTPError as e: raise e except urllib2.URLError as e: raise e except Exception, e: raise IOError("Thread(" + self.processName + ") - %s - Error parsing %s" % (e, url))