def feedsStatus(self): from urllib import urlopen import socket self["tl_red"].hide() self["tl_yellow"].hide() self["tl_green"].hide() currentTimeoutDefault = socket.getdefaulttimeout() socket.setdefaulttimeout(3) try: d = urlopen("http://openvix.co.uk/TrafficLightState.php") self.trafficLight = d.read() if self.trafficLight == "unstable": self["tl_off"].hide() self["tl_red"].show() elif self.trafficLight == "updating": self["tl_off"].hide() self["tl_yellow"].show() elif self.trafficLight == "stable": self["tl_off"].hide() self["tl_green"].show() else: self.trafficLight = "unknown" self["tl_off"].show() except: self.trafficLight = "unknown" self["tl_off"].show() socket.setdefaulttimeout(currentTimeoutDefault)
def get_remote_applications_icon (self): logging.info ("get remote applications icon") for n in self.notification: if not str (n['app_id']) in self.app_ids: self.app_ids.append (str (n['app_id'])) ids_str = ", ".join (self.app_ids) qstr = "SELECT icon_url, app_id FROM application WHERE app_id IN (%s)" % ids_str apps = self._query (qstr) default_timeout = socket.getdefaulttimeout () socket.setdefaulttimeout (GET_ICON_TIMEOUT) logging.debug ("socket timeout: %s" % socket.getdefaulttimeout ()) timeout_count = 0 for app in apps: if timeout_count < 3: try: icon_name = self.get_remote_icon \ (app['icon_url'], self.app_icons_dir) except TimeoutError: logging.debug ("timeout") timeout_count += 1 icon_name = "" except NoUpdateError: logging.debug ("No need update") icon_name = os.path.basename \ (urlparse.urlsplit (app['icon_url']).path) else: icon_name = "" self.applications[app['app_id']] = {'icon_name': icon_name} socket.setdefaulttimeout (default_timeout) self.refresh_status["apps_icon"] = True
def __init__(self, family_or_realsock=socket.AF_INET, *args, **kwargs): should_set_nonblocking = kwargs.pop('set_nonblocking', True) if isinstance(family_or_realsock, (int, long)): fd = _original_socket(family_or_realsock, *args, **kwargs) else: fd = family_or_realsock # import timeout from other socket, if it was there try: self._timeout = fd.gettimeout() or socket.getdefaulttimeout() except AttributeError: self._timeout = socket.getdefaulttimeout() if should_set_nonblocking: set_nonblocking(fd) self.fd = fd # when client calls setblocking(0) or settimeout(0) the socket must # act non-blocking self.act_non_blocking = False # Copy some attributes from underlying real socket. # This is the easiest way that i found to fix # https://bitbucket.org/eventlet/eventlet/issue/136 # Only `getsockopt` is required to fix that issue, others # are just premature optimization to save __getattr__ call. self.bind = fd.bind self.close = fd.close self.fileno = fd.fileno self.getsockname = fd.getsockname self.getsockopt = fd.getsockopt self.listen = fd.listen self.setsockopt = fd.setsockopt self.shutdown = fd.shutdown
def test_update_feeds(hacks_feed, mocked_parse): """update_feeds adds new entries, resets timeout.""" assert socket.getdefaulttimeout() is None count = update_feeds() assert count == 2 assert Entry.objects.count() == 2 assert socket.getdefaulttimeout() is None
def testTimeoutAttribute(self): # This will prove that the timeout gets through HTTPConnection # and into the socket. # default -- use global socket timeout self.assertIsNone(socket.getdefaulttimeout()) socket.setdefaulttimeout(30) try: httpConn = client.HTTPConnection(HOST, TimeoutTest.PORT) httpConn.connect() finally: socket.setdefaulttimeout(None) self.assertEqual(httpConn.sock.gettimeout(), 30) httpConn.close() # no timeout -- do not use global socket default self.assertIsNone(socket.getdefaulttimeout()) socket.setdefaulttimeout(30) try: httpConn = client.HTTPConnection(HOST, TimeoutTest.PORT, timeout=None) httpConn.connect() finally: socket.setdefaulttimeout(None) self.assertEqual(httpConn.sock.gettimeout(), None) httpConn.close() # a value httpConn = client.HTTPConnection(HOST, TimeoutTest.PORT, timeout=30) httpConn.connect() self.assertEqual(httpConn.sock.gettimeout(), 30) httpConn.close()
def get_node(cls, external_manager, proxy_class, hostname=None, ssh_port=22, username='******', password=None, isRemote=False, use_keys=False): key = cls.get_key(proxy_class, hostname, ssh_port, username, password, isRemote, use_keys) if cls.nodes.get(key) is not None: return cls.nodes[key] try: import time import datetime start = datetime.datetime.now() print 'NODE_PROXY : START ',proxy_class,hostname print socket.getdefaulttimeout() print start node = NodeWrapper(external_manager,proxy_class,hostname,ssh_port,username,password,isRemote,use_keys) finally: now = datetime.datetime.now() print 'NODE_PROXY : END ',hostname,socket.getdefaulttimeout() print (now - start).seconds cls._node_pool_lock.acquire() try: if cls.nodes.get(key) is None: cls.nodes[key] = node print 'Adding to NodePool' else: node.cleanup() return cls.nodes[key] finally: cls._node_pool_lock.release()
def testDefaultTimeout(self): # Testing default timeout # The default timeout should initially be None self.assertEqual(socket.getdefaulttimeout(), None) s = socket.socket() self.assertEqual(s.gettimeout(), None) s.close() # Set the default timeout to 10, and see if it propagates socket.setdefaulttimeout(10) self.assertEqual(socket.getdefaulttimeout(), 10) s = socket.socket() self.assertEqual(s.gettimeout(), 10) s.close() # Reset the default timeout to None, and see if it propagates socket.setdefaulttimeout(None) self.assertEqual(socket.getdefaulttimeout(), None) s = socket.socket() self.assertEqual(s.gettimeout(), None) s.close() # Check that setting it to an invalid value raises ValueError self.assertRaises(ValueError, socket.setdefaulttimeout, -1) # Check that setting it to an invalid type raises TypeError self.assertRaises(TypeError, socket.setdefaulttimeout, "spam")
def test_update_feeds_resets_timeout_on_exception(mock_update, hacks_feed): """update_feeds resets the socket timeout even on an exception.""" assert socket.getdefaulttimeout() is None mock_update.side_effect = Exception('Failure') with pytest.raises(Exception): update_feeds() assert socket.getdefaulttimeout() is None
def fetch(self, server): """ This function gets your IP from a specific server """ t = None socket_default_timeout = socket.getdefaulttimeout() opener = urllib.build_opener() opener.addheaders = [('User-agent', "Mozilla/5.0 (X11; Linux x86_64; rv:24.0)" " Gecko/20100101 Firefox/24.0")] try: # Close url resource if fetching not finished within timeout. t = Timer(self.timeout, self.handle_timeout, [self.url]) t.start() # Open URL. if version_info[0:2] == (2, 5): # Support for Python 2.5.* using socket hack # (Changes global socket timeout.) socket.setdefaulttimeout(self.timeout) self.url = opener.open(server) else: self.url = opener.open(server, timeout=self.timeout) # Read response. content = self.url.read() # Didn't want to import chardet. Prefered to stick to stdlib if PY3K: try: content = content.decode('UTF-8') except UnicodeDecodeError: content = content.decode('ISO-8859-1') p = '(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(' p += '25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[' p += '01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)' m = re.search( p, content) myip = m.group(0) if len(myip) > 0: return myip else: return '' except Exception as e: print(e) return '' finally: if self.url is not None: self.url.close() self.url = None if t is not None: t.cancel() # Reset default socket timeout. if socket.getdefaulttimeout() != socket_default_timeout: socket.setdefaulttimeout(socket_default_timeout)
def _socket_create_connection(address, timeout=None): if timeout is None: timeout = socket.getdefaulttimeout() sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) oldtimeout = socket.getdefaulttimeout() socket.setdefaulttimeout(timeout) sock.connect(address) socket.setdefaulttimeout(oldtimeout) return sock
def test_timeout_reset_after_call(self): old_timeout = socket.getdefaulttimeout() self.stub_urlopen_with_timeout_check(30, None, "url") try: socket.setdefaulttimeout(1234) base_utils.urlopen("url", timeout=30) self.assertEquals(1234, socket.getdefaulttimeout()) finally: socket.setdefaulttimeout(old_timeout)
def getTimeout(self): """ Return the timeout set for this session. Should be set in any case, but just to be sure if..else struct. """ if(socket.getdefaulttimeout() != None): return socket.getdefaulttimeout() else: return 'No timeout set'
def testSocketTimeout(self): dict = {'logger': self.logger, 'endpoint': 'https://github.com/dmwm', 'cacheduration': None, 'timeout': 10, } service = Service(dict) deftimeout = socket.getdefaulttimeout() service.getData('%s/socketresettest' % self.testDir, '/WMCore/blob/master/setup.py#L11') assert deftimeout == socket.getdefaulttimeout()
def testPing(self): import socket oldTimeOut = socket.getdefaulttimeout() from angel_app.resource.remote.clone import Clone cc = Clone("80.219.195.84", 6221) assert False == cc.ping() dd = Clone("localhost") assert True == dd.ping(), "Make sure you have a local provider instance running." assert oldTimeOut == socket.getdefaulttimeout()
def testSocketTimeout(self): dict = {'logger': self.logger, 'endpoint':'http://cmssw.cvs.cern.ch/', 'cacheduration': None, 'timeout': 10, #'cachepath' : self.cache_path, #'req_cache_path': '%s/requests' % self.cache_path } service = Service(dict) deftimeout = socket.getdefaulttimeout() service.getData('%s/socketresettest' % self.testDir, '/cgi-bin/cmssw.cgi') assert deftimeout == socket.getdefaulttimeout()
def __init__ (self, family = socket.AF_INET, type = socket.SOCK_STREAM, proto = 0, _sock = None, _hub = None): """ Initialize the UV socket :param family_or_realsock: a socket descriptor or a socket family """ self.uv_fd = None self.uv_handle = None self.uv_hub = None self.uv_recv_string = '' # buffer for receiving data... if isinstance(family, (int, long)): self.uv_fd = _original_socket(family, type, proto, _sock) elif isinstance(family, GreenSocket): _sock = family self.uv_fd = _sock.uv_fd if hasattr(_sock, 'uv_hub') and _sock.uv_hub: _hub = _sock.uv_hub else: _sock = family self.uv_fd = _sock if not self.uv_hub: if _hub: self.uv_hub = _hub else: self.uv_hub = weakref.proxy(get_hub()) ## check if the socket type is supported by pyUV and we can create a pyUV socket... if not self.uv_handle: if self.type == socket.SOCK_STREAM: self.uv_handle = pyuv.TCP(self.uv_hub.uv_loop) self.uv_handle.open(self.fileno()) elif self.type == socket.SOCK_DGRAM: self.uv_handle = pyuv.UDP(self.uv_hub.uv_loop) self.uv_handle.open(self.fileno()) # import timeout from other socket, if it was there try: self._timeout = self.uv_fd.gettimeout() or socket.getdefaulttimeout() except AttributeError: self._timeout = socket.getdefaulttimeout() assert self.uv_fd, 'the socket descriptor must be not null' set_nonblocking(self.uv_fd) # when client calls setblocking(0) or settimeout(0) the socket must act non-blocking self.act_non_blocking = False
def retry_http(tries, backoff=2, on_failure='error'): """ Retry a function or method reading from the internet until no socket or IOError is raised delay sets the initial delay, and backoff sets how much the delay should lengthen after each failure. backoff must be greater than 1, or else it isn't really a backoff. tries must be at least 0, and delay greater than 0. """ delay = socket.getdefaulttimeout() o_delay = socket.getdefaulttimeout() if backoff <= 1: raise ValueError("backoff must be greater than 1") tries = math.floor(tries) if tries < 0: raise ValueError("tries must be 0 or greater") if delay <= 0: delay = 15. o_delay = 15. socket.setdefaulttimeout(delay) #raise ValueError("delay must be greater than 0") def deco_retry(f): def f_retry(*args, **kwargs): mtries, mdelay = tries, delay # make mutable while mtries > 0: try: rv = f(*args, **kwargs) # Try again except IOError,msg: rv = False except socket.error: rv = False if rv != False: # Done on success return rv mtries -= 1 # consume an attempt socket.setdefaulttimeout(mdelay) # wait... mdelay *= backoff # make future wait longer logger.error("URL timeout: %d attempts remaining (delay=%.1fs)"%(mtries,mdelay)) logger.critical("URL timeout: number of trials exceeded") if on_failure=='error': raise IOError,msg # Ran out of tries :-( else: logger.critical("URL Failed, but continuing...") return None
def checkTraficLight(self): self.activityTimer.callback.remove(self.checkTraficLight) self.activityTimer.start(100, False) currentTimeoutDefault = socket.getdefaulttimeout() socket.setdefaulttimeout(3) message = "" picon = None default = True try: # TODO: Use Twisted's URL fetcher, urlopen is evil. And it can # run in parallel to the package update. status = urlopen("http://openpli.org/status/").read().split('!', 1) if getBoxType() in status[0].split(','): message = len(status) > 1 and status[1] or _("The current beta image might not be stable.\nFor more information see %s.") % ("www.openpli.org") picon = MessageBox.TYPE_ERROR default = False except: message = _("The status of the current beta image could not be checked because %s can not be reached.") % ("www.openpli.org") picon = MessageBox.TYPE_ERROR default = False socket.setdefaulttimeout(currentTimeoutDefault) if default: self.showDisclaimer() else: message += "\n" + _("Do you want to update your receiver?") self.session.openWithCallback(self.startActualUpdate, MessageBox, message, default = default, picon = picon)
def test_http_basic(self): self.assertTrue(socket.getdefaulttimeout() is None) url = "http://www.python.org" with support.transient_internet(url, timeout=None): u = _urlopen_with_retry(url) self.addCleanup(u.close) self.assertTrue(u.fp.raw._sock.gettimeout() is None)
def get_tags(): socket_to = None try: socket_to = socket.getdefaulttimeout() socket.setdefaulttimeout(EC2.TIMEOUT) except Exception: pass try: iam_role = urllib2.urlopen(EC2.URL + "/iam/security-credentials").read().strip() iam_params = json.loads(urllib2.urlopen(EC2.URL + "/iam/security-credentials" + "/" + unicode(iam_role)).read().strip()) from checks.libs.boto.ec2.connection import EC2Connection connection = EC2Connection(aws_access_key_id=iam_params['AccessKeyId'], aws_secret_access_key=iam_params['SecretAccessKey'], security_token=iam_params['Token']) instance_object = connection.get_only_instances([EC2.metadata['instance-id']])[0] EC2_tags = [u"%s:%s" % (tag_key, tag_value) for tag_key, tag_value in instance_object.tags.iteritems()] except Exception: log.exception("Problem retrieving custom EC2 tags") EC2_tags = [] try: if socket_to is None: socket_to = 3 socket.setdefaulttimeout(socket_to) except Exception: pass return EC2_tags
def urlopen(url, retries=3, codes=(408, 500, 502, 503, 504), timeout=None): """Open url, optionally retrying if an error is encountered. Socket and other IO errors will always be retried if retries > 0. HTTP errors are retried if the error code is passed in ``codes``. :param retries: Number of time to retry. :param codes: HTTP error codes that should be retried. """ attempts = 0 while True: try: return urllib2.urlopen(url, timeout=timeout) except IOError as e: no_retry = isinstance(e, urllib2.HTTPError) and e.code not in codes if attempts < retries and not no_retry: attempts += 1 continue else: try: url_string = url.get_full_url() # if url is Request obj except Exception: url_string = url if timeout is None: timeout = socket.getdefaulttimeout() log.exception( 'Failed after %s retries on url with a timeout of %s: %s: %s', attempts, timeout, url_string, e) raise e
def myproxy(url): req = urllib2.Request(url) try: # Important or if the remote server is slow # all our web server threads get stuck here # But this is UGLY as Python does not provide per-thread # or per-socket timeouts thru urllib orignal_timeout = socket.getdefaulttimeout() try: socket.setdefaulttimeout(60) response = urllib2.urlopen(req) finally: # restore orignal timeoout socket.setdefaulttimeout(orignal_timeout) # XXX: How to stream respone through Zope # AFAIK - we cannot do it currently return response.read() except HTTPError, e: # Have something more useful to log output as plain urllib exception # using Python logging interface # http://docs.python.org/library/logging.html logger.error("Server did not return HTTP 200 when calling remote proxy URL:" + url) for key, value in params.items(): logger.error(key + ": " + value) # Print the server-side stack trace / error page logger.error(e.read()) raise e
def _call_api(self, api_url, err_env): """urlopen(), plus error handling and possible retries. err_env is a dict of additional info passed to the error handler """ while True: # may retry on error api_request = urllib2.Request( api_url, headers={"Accept-Encoding": "gzip"}) log.debug("Amazon URL: %s" % api_url) try: if self.Timeout and sys.version[:3] in ["2.4", "2.5"]: # urllib2.urlopen() doesn't accept timeout until 2.6 old_timeout = socket.getdefaulttimeout() try: socket.setdefaulttimeout(self.Timeout) return urllib2.urlopen(api_request) finally: socket.setdefaulttimeout(old_timeout) else: # the simple way return urllib2.urlopen(api_request, timeout=self.Timeout) except: if not self.ErrorHandler: raise exception = sys.exc_info()[1] # works in Python 2 and 3 err = {'exception': exception} err.update(err_env) if not self.ErrorHandler(err): raise
def _get_metadata(agentConfig): if GCE.metadata is not None: return GCE.metadata if not agentConfig['collect_instance_metadata']: log.info("Instance metadata collection is disabled. Not collecting it.") GCE.metadata = {} return GCE.metadata socket_to = None try: socket_to = socket.getdefaulttimeout() socket.setdefaulttimeout(GCE.TIMEOUT) except Exception: pass try: opener = urllib2.build_opener() opener.addheaders = [('X-Google-Metadata-Request','True')] GCE.metadata = json.loads(opener.open(GCE.URL).read().strip()) except Exception: GCE.metadata = {} try: if socket_to is None: socket_to = 3 socket.setdefaulttimeout(socket_to) except Exception: pass return GCE.metadata
def _socket_timeout(*args, **kwargs): old_timeout = socket.getdefaulttimeout() socket.setdefaulttimeout(timeout) try: return func(*args, **kwargs) finally: socket.setdefaulttimeout(old_timeout)
def __init__(self, family=_socket.AF_INET, type=_socket.SOCK_STREAM, proto=0, _sock=None): if _sock is None: _sock = _socket_socket(family, type, proto) self.__socket = _sock self.__socket.setblocking(0) self.__timeout = _socket.getdefaulttimeout()
def socket_timeout(timeout): """Context manager to temporarily set the default socket timeout.""" old = socket.getdefaulttimeout() try: yield finally: socket.setdefaulttimeout(old)
def __service(self, url, params=None, timeout=50): old_timeout = socket.getdefaulttimeout() socket.setdefaulttimeout(timeout) try: # POST if params: self.logger.debug('post %s params[%s]' % (url, params)) request = urllib2.Request(url, urllib.urlencode(params)) # GET else: self.logger.debug('get %s params[%s]' % (url, params)) request = urllib2.Request(url) request.add_header('Accept-Language', 'zh-cn') response = urllib2.urlopen(request) content = response.read() response.close() self.logger.debug('content->%s, code->%d' % (content, response.code)) if response.code == 200: return content, True return content, False except Exception as ex: return str(ex), False finally: socket.setdefaulttimeout(old_timeout)
def gdalurlopen(url): timeout = 10 old_timeout = socket.getdefaulttimeout() socket.setdefaulttimeout(timeout) if 'GDAL_HTTP_PROXY' in os.environ: proxy = os.environ['GDAL_HTTP_PROXY'] if 'GDAL_HTTP_PROXYUSERPWD' in os.environ: proxyuserpwd = os.environ['GDAL_HTTP_PROXYUSERPWD'] proxyHandler = urllib2.ProxyHandler({"http" : \ "http://%s@%s" % (proxyuserpwd, proxy)}) else: proxyuserpwd = None proxyHandler = urllib2.ProxyHandler({"http" : \ "http://%s" % (proxy)}) opener = urllib2.build_opener(proxyHandler, urllib2.HTTPHandler) urllib2.install_opener(opener) try: handle = urllib2.urlopen(url) socket.setdefaulttimeout(old_timeout) return handle except urllib2.HTTPError, e: print('HTTP service for %s is down (HTTP Error: %d)' % (url, e.code)) socket.setdefaulttimeout(old_timeout) return None
def execute(self): if self.ctrl_file_data: uploading_kernel = 'kernel' in self.ctrl_file_data if uploading_kernel: default_timeout = socket.getdefaulttimeout() socket.setdefaulttimeout(topic_common.UPLOAD_SOCKET_TIMEOUT) print 'Uploading Kernel: this may take a while...', sys.stdout.flush() try: cf_info = self.execute_rpc(op='generate_control_file', item=self.jobname, **self.ctrl_file_data) finally: if uploading_kernel: socket.setdefaulttimeout(default_timeout) if uploading_kernel: print 'Done' self.data['control_file'] = cf_info['control_file'] if 'synch_count' not in self.data: self.data['synch_count'] = cf_info['synch_count'] if cf_info['is_server']: self.data['control_type'] = 'Server' else: self.data['control_type'] = 'Client' # Get the union of the 2 sets of dependencies deps = set(self.data['dependencies']) deps = sorted(deps.union(cf_info['dependencies'])) self.data['dependencies'] = list(deps) if 'synch_count' not in self.data and 'hostless' not in self.data: self.data['synch_count'] = 1 return self.create_job()
def urlopener(url_or_request, log, **kwargs): """ Utility function for pulling back a url, with a retry of 3 times, increasing the timeout, etc. Re-raises any errors as URLError. .. warning:: This is being replaced by requests library. flexget.utils.requests should be used going forward. :param str url_or_request: URL or Request object to get. :param log: Logger to log debug info and errors to :param kwargs: Keyword arguments to be passed to urlopen :return: The file-like object returned by urlopen """ from flexget.utils.requests import is_unresponsive, set_unresponsive if isinstance(url_or_request, urllib2.Request): url = url_or_request.get_host() else: url = url_or_request if is_unresponsive(url): msg = '%s is known to be unresponsive, not trying again.' % urlparse(url).hostname log.warning(msg) raise urllib2.URLError(msg) retries = kwargs.get('retries', 3) timeout = kwargs.get('timeout', 15.0) # get the old timeout for sockets, so we can set it back to that when done. This is NOT threadsafe by the way. # In order to avoid requiring python 2.6, we're not using the urlopen timeout parameter. That really should be used # after checking for python 2.6. oldtimeout = socket.getdefaulttimeout() try: socket.setdefaulttimeout(timeout) handlers = [SmartRedirectHandler()] if urllib2._opener: handlers.extend(urllib2._opener.handlers) if kwargs.get('handlers'): handlers.extend(kwargs['handlers']) if len(handlers) > 1: handler_names = [h.__class__.__name__ for h in handlers] log.debug('Additional handlers have been specified for this urlopen: %s' % ', '.join(handler_names)) opener = urllib2.build_opener(*handlers).open for i in range(retries): # retry getting the url up to 3 times. if i > 0: time.sleep(3) try: retrieved = opener(url_or_request, kwargs.get('data')) except urllib2.HTTPError as e: if e.code < 500: # If it was not a server error, don't keep retrying. log.warning('Could not retrieve url (HTTP %s error): %s' % (e.code, e.url)) raise log.debug('HTTP error (try %i/%i): %s' % (i + 1, retries, e.code)) except (urllib2.URLError, socket.timeout) as e: if hasattr(e, 'reason'): reason = str(e.reason) else: reason = 'N/A' if reason == 'timed out': set_unresponsive(url) log.debug('Failed to retrieve url (try %i/%i): %s' % (i + 1, retries, reason)) except httplib.IncompleteRead as e: log.critical('Incomplete read - see python bug 6312') break else: # make the returned instance usable in a with statement by adding __enter__ and __exit__ methods def enter(self): return self def exit(self, exc_type, exc_val, exc_tb): self.close() retrieved.__class__.__enter__ = enter retrieved.__class__.__exit__ = exit return retrieved log.warning('Could not retrieve url: %s' % url_or_request) raise urllib2.URLError('Could not retrieve url after %s tries.' % retries) finally: socket.setdefaulttimeout(oldtimeout)
import socket import urllib2 import threading import sys import Queue import socket socket.setdefaulttimeout(7) print("Bobng's proxy checker. Using %s second timeout"%(socket.getdefaulttimeout())) #input_file = sys.argv[1] #proxy_type = sys.argv[2] #options: http,s4,s5 #output_file = sys.argv[3] input_file = 'proxylist.txt' proxy_type = 'http' output_file = 'proxy_alive.txt' url = "www.seemyip.com" # Don't put http:// in here, or any /'s check_queue = Queue.Queue() output_queue = Queue.Queue() threads = 20 def writer(f,rq): while True: line = rq.get() f.write(line+'\n') def checker(q,oq): while True:
def downloadpage(url, post=None, headers=None, timeout=None, follow_redirects=True, cookies=True, replace_headers=False, add_referer=False, only_headers=False, bypass_cloudflare=True, count_retries=0, count_retries_tot=5, random_headers=False, ignore_response_code=False, alfa_s=False, proxy=True, proxy_web=False, proxy_addr_forced=None,forced_proxy=None, proxy_retries=1): """ Abre una url y retorna los datos obtenidos @param url: url que abrir. @type url: str @param post: Si contiene algun valor este es enviado mediante POST. @type post: str @param headers: Headers para la petición, si no contiene nada se usara los headers por defecto. @type headers: dict, list @param timeout: Timeout para la petición. @type timeout: int @param follow_redirects: Indica si se han de seguir las redirecciones. @type follow_redirects: bool @param cookies: Indica si se han de usar las cookies. @type cookies: bool @param replace_headers: Si True, los headers pasados por el parametro "headers" sustituiran por completo los headers por defecto. Si False, los headers pasados por el parametro "headers" modificaran los headers por defecto. @type replace_headers: bool @param add_referer: Indica si se ha de añadir el header "Referer" usando el dominio de la url como valor. @type add_referer: bool @param only_headers: Si True, solo se descargarán los headers, omitiendo el contenido de la url. @type only_headers: bool @param random_headers: Si True, utiliza el método de seleccionar headers aleatorios. @type random_headers: bool @param ignore_response_code: Si es True, ignora el método para WebErrorException para error como el error 404 en veseriesonline, pero es un data funcional @type ignore_response_code: bool @return: Resultado de la petición @rtype: HTTPResponse Parametro Tipo Descripción ---------------------------------------------------------------------------------------------------------------- HTTPResponse.sucess: bool True: Peticion realizada correctamente | False: Error al realizar la petición HTTPResponse.code: int Código de respuesta del servidor o código de error en caso de producirse un error HTTPResponse.error: str Descripción del error en caso de producirse un error HTTPResponse.headers: dict Diccionario con los headers de respuesta del servidor HTTPResponse.data: str Respuesta obtenida del servidor HTTPResponse.time: float Tiempo empleado para realizar la petición """ response = {} # Headers por defecto, si no se especifica nada request_headers = default_headers.copy() # Headers pasados como parametros if headers is not None: if not replace_headers: request_headers.update(dict(headers)) else: request_headers = dict(headers) if add_referer: request_headers["Referer"] = "/".join(url.split("/")[:3]) if random_headers or HTTPTOOLS_DEFAULT_RANDOM_HEADERS: request_headers['User-Agent'] = random_useragent() url = urllib.quote(url, safe="%/:=&?~#+!$,;'@()*[]") #Si la descarga requiere que se haga a través de un servicio Proxy o ProxyWeb, se prepara la url proxy_retries_counter = 0 url_save = url post_save = post while proxy_retries_counter <= proxy_retries: # Handlers init handlers = [urllib2.HTTPHandler(debuglevel=False)] proxy_retries_counter += 1 proxy_stat = '' proxy_addr = '' proxy_CF_addr = '' proxy_web_name = '' proxy_log = '' try: if (proxy or proxy_web) and (forced_proxy or proxy_addr_forced or channel_proxy_list(url, forced_proxy=forced_proxy)): import proxytools proxy_addr, proxy_CF_addr, proxy_web_name, proxy_log = proxytools.get_proxy_addr(url, post=post, forced_proxy=forced_proxy) if proxy_addr_forced and proxy_log: import scrapertools proxy_log = scrapertools.find_single_match(str(proxy_addr_forced), "{'http.*':\s*'(.*?)'}") if proxy and proxy_addr: if proxy_addr_forced: proxy_addr = proxy_addr_forced handlers.append(urllib2.ProxyHandler(proxy_addr)) proxy_stat = ', Proxy Direct ' + proxy_log elif proxy and proxy_CF_addr: if proxy_addr_forced: proxy_CF_addr = proxy_addr_forced handlers.append(urllib2.ProxyHandler(proxy_CF_addr)) proxy_stat = ', Proxy CF ' + proxy_log elif proxy and proxy_addr_forced: proxy_addr = proxy_addr_forced handlers.append(urllib2.ProxyHandler(proxy_addr)) proxy_stat = ', Proxy Direct ' + proxy_log elif proxy and not proxy_addr and not proxy_CF_addr and not proxy_addr_forced: proxy = False if not proxy_web_name: proxy_addr, proxy_CF_addr, proxy_web_name, proxy_log = proxytools.get_proxy_addr(url, forced_proxy='Total') if proxy_web_name: proxy_web = True else: proxy_web = False if proxy_addr: proxy = True handlers.append(urllib2.ProxyHandler(proxy_addr)) proxy_stat = ', Proxy Direct ' + proxy_log if proxy_web and proxy_web_name: if post: proxy_log = '(POST) ' + proxy_log url, post, headers_proxy, proxy_web_name = proxytools.set_proxy_web(url, proxy_web_name, post=post) if proxy_web_name: proxy_stat = ', Proxy Web ' + proxy_log if headers_proxy: request_headers.update(dict(headers_proxy)) if proxy_web and not proxy_web_name: proxy_web = False proxy_addr, proxy_CF_addr, proxy_web_name, proxy_log = proxytools.get_proxy_addr(url, forced_proxy='Total') if proxy_CF_addr: proxy = True handlers.append(urllib2.ProxyHandler(proxy_CF_addr)) proxy_stat = ', Proxy CF ' + proxy_log elif proxy_addr: proxy = True handlers.append(urllib2.ProxyHandler(proxy_addr)) proxy_stat = ', Proxy Direct ' + proxy_log except: import traceback logger.error(traceback.format_exc()) proxy = '' proxy_web = '' proxy_stat = '' proxy_addr = '' proxy_CF_addr = '' proxy_web_name = '' proxy_log = '' url = url_save # Limitar tiempo de descarga si no se ha pasado timeout y hay un valor establecido en la variable global if timeout is None and HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT is not None: timeout = HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT if timeout == 0: timeout = None if not alfa_s: logger.info("----------------------------------------------") logger.info("downloadpage Alfa: %s" %__version) logger.info("----------------------------------------------") logger.info("Timeout: %s" % timeout) logger.info("URL: " + url) logger.info("Dominio: " + urlparse.urlparse(url)[1]) if post: logger.info("Peticion: POST" + proxy_stat) else: logger.info("Peticion: GET" + proxy_stat) logger.info("Usar Cookies: %s" % cookies) logger.info("Descargar Pagina: %s" % (not only_headers)) logger.info("Fichero de Cookies: " + ficherocookies) logger.info("Headers:") for header in request_headers: logger.info("- %s: %s" % (header, request_headers[header])) # Handlers if not follow_redirects: handlers.append(NoRedirectHandler()) if cookies: handlers.append(urllib2.HTTPCookieProcessor(cj)) opener = urllib2.build_opener(*handlers) if not alfa_s: logger.info("Realizando Peticion") # Contador inicio = time.time() req = urllib2.Request(url, post, request_headers) try: if urllib2.__version__ == "2.4": import socket deftimeout = socket.getdefaulttimeout() if timeout is not None: socket.setdefaulttimeout(timeout) handle = opener.open(req) socket.setdefaulttimeout(deftimeout) else: handle = opener.open(req, timeout=timeout) except urllib2.HTTPError, handle: response["sucess"] = False response["code"] = handle.code response["error"] = handle.__dict__.get("reason", str(handle)) response["headers"] = handle.headers.dict if not only_headers: response["data"] = handle.read() else: response["data"] = "" response["time"] = time.time() - inicio response["url"] = handle.geturl() except Exception, e: response["sucess"] = False response["code"] = e.__dict__.get("errno", e.__dict__.get("code", str(e))) response["error"] = e.__dict__.get("reason", str(e)) response["headers"] = {} response["data"] = "" response["time"] = time.time() - inicio response["url"] = url
""" RssReader combines an RSS parser, feed url management, and timed updates, with the option of adding observers to get notification of changes """ import threading import calendar import time try: from email.utils import parsedate # for parsing dates except ImportError: from email.Utils import parsedate # for parsing dates import socket Timeout = socket.getdefaulttimeout() import feedparser import urllib2 from AccessGrid.Preferences import Preferences from AccessGrid import Utilities from AccessGrid import Log # work around apparent bug in socket.setdefaulttimeout, # which is used in feedparser socket.setdefaulttimeout(Timeout) def strtimeToSecs(strtime): """ Convert a time string to seconds since the epoch
def __init__(self, timeout=None): self.old_timeout = socket.getdefaulttimeout() self.timeout = timeout
def test_http_basic(self): self.assertIsNone(socket.getdefaulttimeout()) url = "http://www.example.com" with test_support.transient_internet(url, timeout=None): u = _urlopen_with_retry(url) self.assertIsNone(u.fp._sock.fp._sock.gettimeout())
def _make_request(self, conn, method, url, timeout=_Default, **httplib_request_kw): """ Perform a request on a given urllib connection object taken from our pool. :param conn: a connection from one of our connection pools :param timeout: Socket timeout in seconds for the request. This can be a float or integer, which will set the same timeout value for the socket connect and the socket read, or an instance of :class:`urllib3.util.Timeout`, which gives you more fine-grained control over your timeouts. """ self.num_requests += 1 timeout_obj = self._get_timeout(timeout) timeout_obj.start_connect() conn.timeout = timeout_obj.connect_timeout # Trigger any extra validation we need to do. try: self._validate_conn(conn) except (SocketTimeout, BaseSSLError) as e: # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout. self._raise_timeout(err=e, url=url, timeout_value=conn.timeout) raise # conn.request() calls httplib.*.request, not the method in # urllib3.request. It also calls makefile (recv) on the socket. conn.request(method, url, **httplib_request_kw) # Reset the timeout for the recv() on the socket read_timeout = timeout_obj.read_timeout # App Engine doesn't have a sock attr if getattr(conn, 'sock', None): # In Python 3 socket.py will catch EAGAIN and return None when you # try and read into the file pointer created by http.client, which # instead raises a BadStatusLine exception. Instead of catching # the exception and assuming all BadStatusLine exceptions are read # timeouts, check for a zero timeout before making the request. if read_timeout == 0: raise ReadTimeoutError( self, url, "Read timed out. (read timeout=%s)" % read_timeout) if read_timeout is Timeout.DEFAULT_TIMEOUT: conn.sock.settimeout(socket.getdefaulttimeout()) else: # None or a value conn.sock.settimeout(read_timeout) # Receive the response from the server try: try: # Python 2.7, use buffering of HTTP responses httplib_response = conn.getresponse(buffering=True) except TypeError: # Python 2.6 and older httplib_response = conn.getresponse() except (SocketTimeout, BaseSSLError, SocketError) as e: self._raise_timeout(err=e, url=url, timeout_value=read_timeout) raise # AppEngine doesn't have a version attr. http_version = getattr(conn, '_http_vsn_str', 'HTTP/?') log.debug("\"%s %s %s\" %s %s" % (method, url, http_version, httplib_response.status, httplib_response.length)) return httplib_response
def test_ftp_basic(self): self.assertTrue(socket.getdefaulttimeout() is None) u = _urlopen_with_retry(self.FTP_HOST) self.assertTrue(u.fp.fp._sock.gettimeout() is None)
def test_http_basic(self): self.assertTrue(socket.getdefaulttimeout() is None) u = _urlopen_with_retry("http://www.python.org") self.assertTrue(u.fp._sock.fp._sock.gettimeout() is None)
def Send(self, request_path, payload="", content_type="application/octet-stream", timeout=None, **kwargs): """Sends an RPC and returns the response. Args: request_path: The path to send the request to, eg /api/appversion/create. payload: The body of the request, or None to send an empty request. content_type: The Content-Type header to use. timeout: timeout in seconds; default None i.e. no timeout. (Note: for large requests on OS X, the timeout doesn't work right.) kwargs: Any keyword arguments are converted into query string parameters. Returns: The response body, as a string. """ # We get the auth domain to know if we're uploading to Google or to # Appscale. auth_domain = '' if 'AUTH_DOMAIN' in os.environ: auth_domain = os.environ['AUTH_DOMAIN'].lower() old_timeout = socket.getdefaulttimeout() socket.setdefaulttimeout(timeout) try: tries = 0 while True: tries += 1 if auth_domain == "appscale": self._LoadAppScaleCookie() url = "%s://%s%s" % (self.scheme, self.host, request_path) if kwargs: url += "?" + urllib.urlencode(sorted(kwargs.items())) req = self._CreateRequest(url=url, data=payload) req.add_header("Content-Type", content_type) req.add_header("X-appcfg-api-version", "1") try: logger.debug( 'Sending %s request:\n%s', self.scheme.upper(), HttpRequestToString(req, include_data=self.debug_data)) f = self.opener.open(req) response = f.read() f.close() return response except urllib2.HTTPError, e: logger.debug("Got http error, this is try #%s", tries) if tries > self.rpc_tries: raise AppScaleAuthenticationError("Unable to authenticate " + \ "with AppScale.") # App Load Balancer returns HTTP 502 if invalid cookie # is used for authentication, though ideally it should throw # HTTP 401 only. But handling HTTP 502 here until the App # Load Balancer code is fixed. if e.code == 401 or e.code == 502: if auth_domain == 'appscale': self._AppScaleAuthenticate() else: self._Authenticate() elif e.code >= 500 and e.code < 600: continue elif e.code == 302: if tries >= 2: if auth_domain == 'appscale': logger.info("Deleting authentication cookie : %s" % \ self.cookie_jar.filename) if os.path.isfile(self.cookie_jar.filename): os.remove(self.cookie_jar.filename) raise AppScaleAuthenticationError("Could not " + \ "authenticate with AppScale. Wrong username/password.") else: raise loc = e.info()["location"] logger.debug("Got 302 redirect. Location: %s", loc) if loc.startswith( "https://www.google.com/accounts/ServiceLogin" ): self._Authenticate() elif re.match( r"https://www.google.com/a/[a-z0-9.-]+/ServiceLogin", loc): self.account_type = os.getenv( "APPENGINE_RPC_HOSTED_LOGIN_TYPE", "HOSTED") self._Authenticate() elif auth_domain == 'appscale': self._AppScaleAuthenticate() elif loc.startswith("http://%s/_ah/login" % (self.host, )): self._DevAppServerAuthenticate() else: raise elif e.code == 403: if auth_domain == 'appscale': logger.info("Deleting authentication cookie : %s" % \ self.cookie_jar.filename) os.remove(self.cookie_jar.filename) raise AppScaleAuthenticationError("Could not authenticate " + \ "with AppScale.") else: raise else: raise finally: socket.setdefaulttimeout(old_timeout)
def setUpClass(cls): cls.original_socket_default_timeout = socket.getdefaulttimeout()
def socket_getdefaulttimeout(): return socket.getdefaulttimeout()
def _make_request(self, conn, method, url, timeout=_Default, chunked=False, **httplib_request_kw): """ Perform a base_ on a given urllib connection object taken from our pool. :param conn: a connection from one of our connection pools :param timeout: Socket timeout in seconds for the base_. This can be a float or integer, which will set the same timeout value for the socket connect and the socket read, or an instance of :class:`urllib3.util.Timeout`, which gives you more fine-grained control over your timeouts. """ self.num_requests += 1 timeout_obj = self._get_timeout(timeout) timeout_obj.start_connect() conn.timeout = timeout_obj.connect_timeout # Trigger any extra validation we need to do. try: self._validate_conn(conn) except (SocketTimeout, BaseSSLError) as e: # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout. self._raise_timeout(err=e, url=url, timeout_value=conn.timeout) raise # conn.base_() calls httplib.*.base_, not the method in # urllib3.base_. It also calls makefile (recv) on the socket. if chunked: conn.request_chunked(method, url, **httplib_request_kw) else: conn.request(method, url, **httplib_request_kw) # Reset the timeout for the recv() on the socket read_timeout = timeout_obj.read_timeout # App Engine doesn't have a sock attr if getattr(conn, 'sock', None): # In Python 3 socket.py will catch EAGAIN and return None when you # try and read into the file pointer created by M_http.client, which # instead raises a BadStatusLine exception. Instead of catching # the exception and assuming all BadStatusLine exceptions are read # timeouts, check for a zero timeout before making the base_. if read_timeout == 0: raise ReadTimeoutError( self, url, "Read timed out. (read timeout=%s)" % read_timeout) if read_timeout is Timeout.DEFAULT_TIMEOUT: conn.sock.settimeout(socket.getdefaulttimeout()) else: # None or a value conn.sock.settimeout(read_timeout) # Receive the response from the server try: try: # Python 2.7, use buffering of HTTP responses httplib_response = conn.getresponse(buffering=True) except TypeError: # Python 3 try: httplib_response = conn.getresponse() except Exception as e: # Remove the TypeError from the exception chain in Python 3; # otherwise it looks like a programming error was the cause. six.raise_from(e, None) except (SocketTimeout, BaseSSLError, SocketError) as e: self._raise_timeout(err=e, url=url, timeout_value=read_timeout) raise # AppEngine doesn't have a version attr. http_version = getattr(conn, '_http_vsn_str', 'HTTP/?') log.debug("%s://%s:%s \"%s %s %s\" %s %s", self.scheme, self.host, self.port, method, url, http_version, httplib_response.status, httplib_response.length) try: assert_header_parsing(httplib_response.msg) except (HeaderParsingError, TypeError) as hpe: # Platform-specific: Python 3 log.warning('Failed to parse headers (url=%s): %s', self._absolute_url(url), hpe, exc_info=True) return httplib_response
def test_ftp_basic(self): self.assertIsNone(socket.getdefaulttimeout()) with test_support.transient_internet(self.FTP_HOST, timeout=None): u = _urlopen_with_retry(self.FTP_HOST) self.assertIsNone(u.fp.fp._sock.gettimeout())
def default_socket_timeout(timeout): """Context temporarily setting the default socket timeout.""" prev = socket.getdefaulttimeout() socket.setdefaulttimeout(timeout) yield socket.setdefaulttimeout(prev)
def _RetrieveURL(url, payload, method, headers, request, response, follow_redirects=True, deadline=_API_CALL_DEADLINE, validate_certificate=_API_CALL_VALIDATE_CERTIFICATE_DEFAULT): """Retrieves a URL over network. Args: url: String containing the URL to access. payload: Request payload to send, if any; None if no payload. If the payload is unicode, we assume it is utf-8. method: HTTP method to use (e.g., 'GET') headers: List of additional header objects to use for the request. request: A urlfetch_service_pb.URLFetchRequest proto object from original request. response: A urlfetch_service_pb.URLFetchResponse proto object to populate with the response data. follow_redirects: optional setting (defaulting to True) for whether or not we should transparently follow redirects (up to MAX_REDIRECTS) deadline: Number of seconds to wait for the urlfetch to finish. validate_certificate: If true, do not send request to server unless the certificate is valid, signed by a trusted CA and the hostname matches the certificate. Raises: Raises an apiproxy_errors.ApplicationError exception with INVALID_URL_ERROR in cases where: - The protocol of the redirected URL is bad or missing. - The port is not in the allowable range of ports. Raises an apiproxy_errors.ApplicationError exception with TOO_MANY_REDIRECTS in cases when MAX_REDIRECTS is exceeded """ last_protocol = '' last_host = '' if isinstance(payload, unicode): payload = payload.encode('utf-8') for redirect_number in xrange(MAX_REDIRECTS + 1): parsed = urlparse.urlsplit(url) protocol, host, path, query, fragment = parsed port = urllib.splitport(urllib.splituser(host)[1])[1] if not _IsAllowedPort(port): logging.error( 'urlfetch received %s ; port %s is not allowed in production!' % (url, port)) raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.INVALID_URL) if protocol and not host: logging.error('Missing host on redirect; target url is %s' % url) raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.INVALID_URL) if not host and not protocol: host = last_host protocol = last_protocol adjusted_headers = { 'User-Agent': [('AppEngine-Google; (+http://code.google.com/appengine; appid: %s)' % os.getenv('APPLICATION_ID'))], 'Host': [host], 'Accept-Encoding': ['gzip'], } if payload is not None: adjusted_headers['Content-Length'] = [str(len(payload))] if method == 'POST' and payload: adjusted_headers['Content-Type'] = [ 'application/x-www-form-urlencoded'] passthrough_content_encoding = False for header in headers: header_key = header.key() if header_key.lower() == 'user-agent': adjusted_headers[header_key.title()] = [( '%s %s' % (header.value(), adjusted_headers['User-Agent'][0]))] elif header_key.lower() == 'accept-encoding': passthrough_content_encoding = True adjusted_headers[header_key.title()] = [header.value()] elif header_key.lower() == 'content-type': adjusted_headers[header_key.title()] = [header.value()] else: adjusted_headers.setdefault(header_key, []).append(header.value()) if payload is not None: escaped_payload = payload.encode('string_escape') else: escaped_payload = '' logging.debug('Making HTTP request: host = %r, ' 'url = %r, payload = %.1000r, headers = %r', host, url, escaped_payload, adjusted_headers) try: proxy_host = None if protocol == 'http': connection_class = httplib.HTTPConnection default_port = 80 if os.environ.get('HTTP_PROXY') and not _IsLocalhost(host): _, proxy_host, _, _, _ = ( urlparse.urlsplit(os.environ.get('HTTP_PROXY'))) elif protocol == 'https': if (validate_certificate and _CanValidateCerts() and CERT_PATH): connection_class = fancy_urllib.create_fancy_connection( ca_certs=CERT_PATH) else: connection_class = httplib.HTTPSConnection default_port = 443 if (_CONNECTION_SUPPORTS_SSL_TUNNEL and os.environ.get('HTTPS_PROXY') and not _IsLocalhost(host)): _, proxy_host, _, _, _ = ( urlparse.urlsplit(os.environ.get('HTTPS_PROXY'))) else: error_msg = 'Redirect specified invalid protocol: "%s"' % protocol logging.error(error_msg) raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.INVALID_URL, error_msg) connection_kwargs = ( {'timeout': deadline} if _CONNECTION_SUPPORTS_TIMEOUT else {}) if proxy_host: proxy_address, _, proxy_port = proxy_host.partition(':') connection = connection_class( proxy_address, proxy_port if proxy_port else default_port, **connection_kwargs) full_path = urlparse.urlunsplit((protocol, host, path, query, '')) if protocol == 'https': connection.set_tunnel(host) else: connection = connection_class(host, **connection_kwargs) full_path = urlparse.urlunsplit(('', '', path, query, '')) last_protocol = protocol last_host = host if not _CONNECTION_SUPPORTS_TIMEOUT: orig_timeout = socket.getdefaulttimeout() try: if not _CONNECTION_SUPPORTS_TIMEOUT: socket.setdefaulttimeout(deadline) _SendRequest(connection, method, full_path, payload, adjusted_headers) http_response = connection.getresponse() if method == 'HEAD': http_response_data = '' else: http_response_data = http_response.read() finally: if not _CONNECTION_SUPPORTS_TIMEOUT: socket.setdefaulttimeout(orig_timeout) connection.close() except _fancy_urllib_InvalidCertException, e: raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.SSL_CERTIFICATE_ERROR, str(e)) except _fancy_urllib_SSLError, e: app_error = ( urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED if 'timed out' in e.message else urlfetch_service_pb.URLFetchServiceError.SSL_CERTIFICATE_ERROR) raise apiproxy_errors.ApplicationError(app_error, str(e))
def _f(*args, **kwargs): orig_timeout = socket.getdefaulttimeout() socket.setdefaulttimeout(args[0].__timeout) result = f(*args, **kwargs) socket.setdefaulttimeout(orig_timeout) return result
def test_ftp_basic(self): self.assertIsNone(socket.getdefaulttimeout()) with socket_helper.transient_internet(self.FTP_HOST, timeout=None): u = _urlopen_with_retry(self.FTP_HOST) self.addCleanup(u.close) self.assertIsNone(u.fp.fp.raw._sock.gettimeout())
def transient_internet(resource_name, timeout=30.0, errnos=()): """Return a context manager that raises ResourceDenied when various issues with the Internet connection manifest themselves as exceptions.""" default_errnos = [ ('ECONNREFUSED', 111), ('ECONNRESET', 104), ('EHOSTUNREACH', 113), ('ENETUNREACH', 101), ('ETIMEDOUT', 110), ] default_gai_errnos = [ ('EAI_AGAIN', -3), ('EAI_FAIL', -4), ('EAI_NONAME', -2), ('EAI_NODATA', -5), # Windows defines EAI_NODATA as 11001 but idiotic getaddrinfo() # implementation actually returns WSANO_DATA i.e. 11004. ('WSANO_DATA', 11004), ] denied = ResourceDenied("Resource '%s' is not available" % resource_name) captured_errnos = errnos gai_errnos = [] if not captured_errnos: captured_errnos = [ getattr(errno, name, num) for (name, num) in default_errnos ] gai_errnos = [ getattr(socket, name, num) for (name, num) in default_gai_errnos ] def filter_error(err): n = getattr(err, 'errno', None) if (isinstance(err, socket.timeout) or (isinstance(err, socket.gaierror) and n in gai_errnos) or n in captured_errnos): if not verbose: sys.stderr.write(denied.args[0] + "\n") raise denied old_timeout = socket.getdefaulttimeout() try: if timeout is not None: socket.setdefaulttimeout(timeout) yield except IOError as err: # urllib can wrap original socket errors multiple times (!), we must # unwrap to get at the original error. while True: a = err.args if len(a) >= 1 and isinstance(a[0], IOError): err = a[0] # The error can also be wrapped as args[1]: # except socket.error as msg: # raise IOError('socket error', msg).with_traceback(sys.exc_info()[2]) elif len(a) >= 2 and isinstance(a[1], IOError): err = a[1] else: break filter_error(err) raise # XXX should we catch generic exceptions and look for their # __cause__ or __context__? finally: socket.setdefaulttimeout(old_timeout)
def __call__(self, *args, **kw): if self._name == "_": if self.__name in ["__repr__", "__str__"]: return self.__repr__() else: chain = [] parent = self._parent while parent._parent: chain = [parent._name] + chain parent = parent._parent url = parent._icontrol_url % parent.__dict__ ns = parent._icontrol_ns + ':' + '/'.join(chain) if parent._url_params: url = "%s?%s" % ( url, urllib.parse.urlencode(parent._url_params)) parent._cache.clear() p = parent if p._cache.get(ns) is not None: ic = p._cache[ns] else: if parent._session: headers = SOAPpy.Types.headerType() sess_t = SOAPpy.Types.integerType(parent._session) sess_t._setMustUnderstand(0) sess_t._setAttr('xmlns:myns1', parent._icontrol_ns) headers._addItem('myns1:session', sess_t) ic = SOAPpy.SOAPProxy(url, ns, header=headers, timeout=p.timeout) else: ic = SOAPpy.SOAPProxy(url, ns, timeout=p.timeout) p._cache[ns] = ic #ic.config.debug = p._debug ic.simplify_objects = 1 try: # An ugly way of setting the timeout per socket, but it # seems that SOAPpy is ignoring the timeout parameter set in # the SOAPProxy constructor. before = socket.getdefaulttimeout() socket.setdefaulttimeout(p.timeout) if p._debug: LOG.debug( "%s -> %s.%s(%s)", url, '.'.join(chain), self._name, ', '.join([ '%s=%s' % (x, y) for x, y in list(kw.items()) ])) ret = getattr(ic, self._name)(*args, **kw) if p._debug: LOG.debug(ret) return ret except SOAPpy.Types.faultType as e: if 'Unknown method' in e.faultstring: raise UnknownMethod(e) raise IControlFault(e) except SOAPpy.Errors.HTTPError as e: if 401 == e.code: raise AuthFailed(e) raise IControlTransportError(e) finally: socket.setdefaulttimeout(before)
def ipkgCallback(self, event, param): if event == IpkgComponent.EVENT_DOWNLOAD: self.status.setText(_("Downloading")) elif event == IpkgComponent.EVENT_UPGRADE: if self.sliderPackages.has_key(param): self.slider.setValue(self.sliderPackages[param]) self.package.setText(param) self.status.setText( _("Upgrading") + ": %s/%s" % (self.packages, self.total_packages)) if not param in self.processed_packages: self.processed_packages.append(param) self.packages += 1 elif event == IpkgComponent.EVENT_INSTALL: self.package.setText(param) self.status.setText(_("Installing")) if not param in self.processed_packages: self.processed_packages.append(param) self.packages += 1 elif event == IpkgComponent.EVENT_REMOVE: self.package.setText(param) self.status.setText(_("Removing")) if not param in self.processed_packages: self.processed_packages.append(param) self.packages += 1 elif event == IpkgComponent.EVENT_CONFIGURING: self.package.setText(param) self.status.setText(_("Configuring")) elif event == IpkgComponent.EVENT_MODIFIED: if config.plugins.softwaremanager.overwriteConfigFiles.getValue( ) in ("N", "Y"): self.ipkg.write(True and config.plugins.softwaremanager. overwriteConfigFiles.getValue()) else: self.session.openWithCallback( self.modificationCallback, MessageBox, _("A configuration file (%s) has been modified since it was installed.\nDo you want to keep your modifications?" ) % param) elif event == IpkgComponent.EVENT_ERROR: self.error += 1 elif event == IpkgComponent.EVENT_DONE: if self.updating: self.updating = False self.ipkg.startCmd(IpkgComponent.CMD_UPGRADE_LIST) elif self.ipkg.currentCommand == IpkgComponent.CMD_UPGRADE_LIST: from urllib import urlopen import socket currentTimeoutDefault = socket.getdefaulttimeout() socket.setdefaulttimeout(3) try: config.softwareupdate.updateisunstable.setValue( urlopen( "http://enigma2.world-of-satellite.com/feeds/" + getImageVersion() + "/status").read()) except: config.softwareupdate.updateisunstable.setValue('1') socket.setdefaulttimeout(currentTimeoutDefault) self.total_packages = None if config.softwareupdate.updateisunstable.getValue( ) == '1' and config.softwareupdate.updatebeta.getValue(): self.total_packages = len(self.ipkg.getFetchedList()) message = _( "The current update may be unstable") + "\n" + _( "Are you sure you want to update your %s %s ?" ) % (getMachineBrand(), getMachineName()) + "\n(" + ( ngettext("%s updated package available", "%s updated packages available", self.total_packages) % self.total_packages) + ")" elif config.softwareupdate.updateisunstable.getValue() == '0': self.total_packages = len(self.ipkg.getFetchedList()) message = _("Do you want to update your %s %s ?") % ( getMachineBrand(), getMachineName()) + "\n(" + ( ngettext("%s updated package available", "%s updated packages available", self.total_packages) % self.total_packages) + ")" if self.total_packages: global ocram for package_tmp in self.ipkg.getFetchedList(): if package_tmp[0].startswith( 'enigma2-plugin-picons-tv-ocram'): ocram = ocram + '[ocram-picons] ' + package_tmp[ 0].split('enigma2-plugin-picons-tv-ocram.')[ 1] + 'updated ' + package_tmp[2] + '\n' elif package_tmp[0].startswith( 'enigma2-plugin-settings-ocram'): ocram = ocram + '[ocram-settings] ' + package_tmp[ 0].split('enigma2-plugin-picons-tv-ocram.')[ 1] + 'updated ' + package_tmp[2] + '\n' config.softwareupdate.updatefound.setValue(True) choices = [(_("View the changes"), "changes"), (_("Upgrade and reboot system"), "cold")] if path.exists( "/usr/lib/enigma2/python/Plugins/SystemPlugins/ViX/BackupManager.pyo" ): if not config.softwareupdate.autosettingsbackup.getValue( ) and config.backupmanager.backuplocation.getValue(): choices.append( (_("Perform a settings backup,") + '\n\t' + _("making a backup before updating") + '\n\t' + _("is strongly advised."), "backup")) if not config.softwareupdate.autoimagebackup.getValue( ) and config.imagemanager.backuplocation.getValue(): choices.append((_("Perform a full image backup"), "imagebackup")) choices.append((_("Update channel list only"), "channels")) choices.append((_("Cancel"), "")) upgrademessage = self.session.openWithCallback( self.startActualUpgrade, ChoiceBox, title=message, list=choices, skin_name="SoftwareUpdateChoices") upgrademessage.setTitle(_('Software update')) else: upgrademessage = self.session.openWithCallback( self.close, MessageBox, _("Nothing to upgrade"), type=MessageBox.TYPE_INFO, timeout=10, close_on_any_key=True) upgrademessage.setTitle(_('Software update')) elif self.channellist_only > 0: if self.channellist_only == 1: self.setEndMessage( _("Could not find installed channel list.")) elif self.channellist_only == 2: self.slider.setValue(2) self.ipkg.startCmd(IpkgComponent.CMD_REMOVE, {'package': self.channellist_name}) self.channellist_only += 1 elif self.channellist_only == 3: self.slider.setValue(3) self.ipkg.startCmd(IpkgComponent.CMD_INSTALL, {'package': self.channellist_name}) self.channellist_only += 1 elif self.channellist_only == 4: self.showUpdateCompletedMessage() eDVBDB.getInstance().reloadBouquets() eDVBDB.getInstance().reloadServicelist() elif self.error == 0: self.showUpdateCompletedMessage() else: self.activityTimer.stop() self.activityslider.setValue(0) error = _( "Your %s %s might be unusable now. Please consult the manual for further assistance before rebooting your %s %s." ) % (getMachineBrand(), getMachineName(), getMachineBrand(), getMachineName()) if self.packages == 0: error = _("No updates available. Please try again later.") if self.updating: error = _( "Update failed. Your %s %s does not have a working internet connection." ) % (getMachineBrand(), getMachineName()) self.status.setText(_("Error") + " - " + error) elif event == IpkgComponent.EVENT_LISTITEM: if 'enigma2-plugin-settings-' in param[ 0] and self.channellist_only > 0: self.channellist_name = param[0] self.channellist_only = 2 #print event, "-", param pass
class RecursiveFetcher(object): LINK_FILTER = tuple( re.compile(i, re.IGNORECASE) for i in ('.exe\s*$', '.mp3\s*$', '.ogg\s*$', '^\s*mailto:', '^\s*$')) # ADBLOCK_FILTER = tuple(re.compile(i, re.IGNORECASE) for it in # ( # # ) # ) CSS_IMPORT_PATTERN = re.compile(r'\@import\s+url\((.*?)\)', re.IGNORECASE) default_timeout = socket.getdefaulttimeout( ) # Needed here as it is used in __del__ def __init__(self, options, log, image_map={}, css_map={}, job_info=None): bd = options.dir if not isinstance(bd, unicode): bd = bd.decode(filesystem_encoding) self.base_dir = os.path.abspath(os.path.expanduser(bd)) if not os.path.exists(self.base_dir): os.makedirs(self.base_dir) self.log = log self.verbose = options.verbose self.timeout = options.timeout self.encoding = options.encoding self.browser = options.browser if hasattr(options, 'browser') else browser() self.max_recursions = options.max_recursions self.match_regexps = [ re.compile(i, re.IGNORECASE) for i in options.match_regexps ] self.filter_regexps = [ re.compile(i, re.IGNORECASE) for i in options.filter_regexps ] self.max_files = options.max_files self.delay = options.delay self.last_fetch_at = 0. self.filemap = {} self.imagemap = image_map self.imagemap_lock = threading.RLock() self.stylemap = css_map self.image_url_processor = None self.stylemap_lock = threading.RLock() self.downloaded_paths = [] self.current_dir = self.base_dir self.files = 0 self.preprocess_regexps = getattr(options, 'preprocess_regexps', []) self.remove_tags = getattr(options, 'remove_tags', []) self.remove_tags_after = getattr(options, 'remove_tags_after', None) self.remove_tags_before = getattr(options, 'remove_tags_before', None) self.keep_only_tags = getattr(options, 'keep_only_tags', []) self.preprocess_html_ext = getattr(options, 'preprocess_html', lambda soup: soup) self.preprocess_raw_html = getattr(options, 'preprocess_raw_html', lambda raw, url: raw) self.prepreprocess_html_ext = getattr(options, 'skip_ad_pages', lambda soup: None) self.postprocess_html_ext = getattr(options, 'postprocess_html', None) self.preprocess_image_ext = getattr(options, 'preprocess_image', None) self._is_link_wanted = getattr(options, 'is_link_wanted', default_is_link_wanted) self.compress_news_images_max_size = getattr( options, 'compress_news_images_max_size', None) self.compress_news_images = getattr(options, 'compress_news_images', False) self.compress_news_images_auto_size = getattr( options, 'compress_news_images_auto_size', 16) self.scale_news_images = getattr(options, 'scale_news_images', None) self.download_stylesheets = not options.no_stylesheets self.show_progress = True self.failed_links = [] self.job_info = job_info def get_soup(self, src, url=None): nmassage = copy.copy(BeautifulSoup.MARKUP_MASSAGE) nmassage.extend(self.preprocess_regexps) # Some websites have buggy doctype declarations that mess up beautifulsoup nmassage += [(re.compile(r'<!DOCTYPE .+?>', re.DOTALL | re.IGNORECASE), lambda m: '')] # Remove comments as they can leave detritus when extracting tags leaves # multiple nested comments nmassage.append((re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')) usrc = xml_to_unicode(src, self.verbose, strip_encoding_pats=True)[0] usrc = self.preprocess_raw_html(usrc, url) soup = BeautifulSoup(usrc, markupMassage=nmassage) replace = self.prepreprocess_html_ext(soup) if replace is not None: soup = BeautifulSoup(xml_to_unicode(replace, self.verbose, strip_encoding_pats=True)[0], markupMassage=nmassage) if self.keep_only_tags: body = Tag(soup, 'body') try: if isinstance(self.keep_only_tags, dict): self.keep_only_tags = [self.keep_only_tags] for spec in self.keep_only_tags: for tag in soup.find('body').findAll(**spec): body.insert(len(body.contents), tag) soup.find('body').replaceWith(body) except AttributeError: # soup has no body element pass def remove_beyond(tag, next): while tag is not None and getattr(tag, 'name', None) != 'body': after = getattr(tag, next) while after is not None: ns = getattr(tag, next) after.extract() after = ns tag = tag.parent if self.remove_tags_after is not None: rt = [self.remove_tags_after] if isinstance( self.remove_tags_after, dict) else self.remove_tags_after for spec in rt: tag = soup.find(**spec) remove_beyond(tag, 'nextSibling') if self.remove_tags_before is not None: rt = [self.remove_tags_before] if isinstance( self.remove_tags_before, dict) else self.remove_tags_before for spec in rt: tag = soup.find(**spec) remove_beyond(tag, 'previousSibling') for kwds in self.remove_tags: for tag in soup.findAll(**kwds): tag.extract() return self.preprocess_html_ext(soup) def fetch_url(self, url): data = None self.log.debug('Fetching', url) st = time.time() # Check for a URL pointing to the local filesystem and special case it # for efficiency and robustness. Bypasses delay checking as it does not # apply to local fetches. Ensures that unicode paths that are not # representable in the filesystem_encoding work. is_local = 0 if url.startswith('file://'): is_local = 7 elif url.startswith('file:'): is_local = 5 if is_local > 0: url = url[is_local:] if iswindows and url.startswith('/'): url = url[1:] with open(url, 'rb') as f: data = response(f.read()) data.newurl = 'file:' + url # This is what mechanize does for # local URLs self.log.debug('Fetched %s in %.1f seconds' % (url, time.time() - st)) return data delta = time.time() - self.last_fetch_at if delta < self.delay: time.sleep(self.delay - delta) if isinstance(url, unicode): url = url.encode('utf-8') # Not sure is this is really needed as I think mechanize # handles quoting automatically, but leaving it # in case it breaks something if re.search(r'\s+', url) is not None: purl = list(urlparse.urlparse(url)) for i in range(2, 6): purl[i] = quote(purl[i]) url = urlparse.urlunparse(purl) open_func = getattr(self.browser, 'open_novisit', self.browser.open) try: with closing(open_func(url, timeout=self.timeout)) as f: data = response(f.read() + f.read()) data.newurl = f.geturl() except urllib2.URLError as err: if hasattr(err, 'code') and err.code in responses: raise FetchError(responses[err.code]) if getattr(err, 'reason', [0])[0] == 104 or \ getattr(getattr(err, 'args', [None])[0], 'errno', None) in (-2, -3): # Connection reset by peer or Name or service not known self.log.debug('Temporary error, retrying in 1 second') time.sleep(1) with closing(open_func(url, timeout=self.timeout)) as f: data = response(f.read() + f.read()) data.newurl = f.geturl() else: raise err finally: self.last_fetch_at = time.time() self.log.debug('Fetched %s in %f seconds' % (url, time.time() - st)) return data def start_fetch(self, url): soup = BeautifulSoup(u'<a href="' + url + '" />') res = self.process_links(soup, url, 0, into_dir='') self.log.debug(url, 'saved to', res) return res def is_link_ok(self, url): for i in self.__class__.LINK_FILTER: if i.search(url): return False return True def is_link_wanted(self, url, tag): try: return self._is_link_wanted(url, tag) except NotImplementedError: pass except: return False if self.filter_regexps: for f in self.filter_regexps: if f.search(url): return False if self.match_regexps: for m in self.match_regexps: if m.search(url): return True return False return True def process_stylesheets(self, soup, baseurl): diskpath = unicode_path(os.path.join(self.current_dir, 'stylesheets')) if not os.path.exists(diskpath): os.mkdir(diskpath) for c, tag in enumerate( soup.findAll(lambda tag: tag.name.lower() in ['link', 'style'] and tag.has_key('type') and tag['type'].lower( ) == 'text/css')): # noqa if tag.has_key('href'): # noqa iurl = tag['href'] if not urlparse.urlsplit(iurl).scheme: iurl = urlparse.urljoin(baseurl, iurl, False) with self.stylemap_lock: if self.stylemap.has_key(iurl): # noqa tag['href'] = self.stylemap[iurl] continue try: data = self.fetch_url(iurl) except Exception: self.log.exception('Could not fetch stylesheet ', iurl) continue stylepath = os.path.join(diskpath, 'style' + str(c) + '.css') with self.stylemap_lock: self.stylemap[iurl] = stylepath with open(stylepath, 'wb') as x: x.write(data) tag['href'] = stylepath else: for ns in tag.findAll(text=True): src = str(ns) m = self.__class__.CSS_IMPORT_PATTERN.search(src) if m: iurl = m.group(1) if not urlparse.urlsplit(iurl).scheme: iurl = urlparse.urljoin(baseurl, iurl, False) with self.stylemap_lock: if self.stylemap.has_key(iurl): # noqa ns.replaceWith( src.replace(m.group(1), self.stylemap[iurl])) continue try: data = self.fetch_url(iurl) except Exception: self.log.exception('Could not fetch stylesheet ', iurl) continue c += 1 stylepath = os.path.join(diskpath, 'style' + str(c) + '.css') with self.stylemap_lock: self.stylemap[iurl] = stylepath with open(stylepath, 'wb') as x: x.write(data) ns.replaceWith(src.replace(m.group(1), stylepath)) def rescale_image(self, data): return rescale_image(data, self.scale_news_images, self.compress_news_images_max_size, self.compress_news_images_auto_size) def process_images(self, soup, baseurl): diskpath = unicode_path(os.path.join(self.current_dir, 'images')) if not os.path.exists(diskpath): os.mkdir(diskpath) c = 0 for tag in soup.findAll(lambda tag: tag.name.lower() == 'img' and tag. has_key('src')): # noqa iurl = tag['src'] if iurl.startswith('data:image/'): try: data = b64decode(iurl.partition(',')[-1]) except: self.log.exception('Failed to decode embedded image') continue else: if callable(self.image_url_processor): iurl = self.image_url_processor(baseurl, iurl) if not urlparse.urlsplit(iurl).scheme: iurl = urlparse.urljoin(baseurl, iurl, False) with self.imagemap_lock: if self.imagemap.has_key(iurl): # noqa tag['src'] = self.imagemap[iurl] continue try: data = self.fetch_url(iurl) if data == 'GIF89a\x01': # Skip empty GIF files as PIL errors on them anyway continue except Exception: self.log.exception('Could not fetch image ', iurl) continue c += 1 fname = ascii_filename('img' + str(c)) if isinstance(fname, unicode): fname = fname.encode('ascii', 'replace') data = self.preprocess_image_ext( data, iurl) if self.preprocess_image_ext is not None else data if data is None: continue itype = what(None, data) if itype == 'svg' or (itype is None and b'<svg' in data[:1024]): # SVG image imgpath = os.path.join(diskpath, fname + '.svg') with self.imagemap_lock: self.imagemap[iurl] = imgpath with open(imgpath, 'wb') as x: x.write(data) tag['src'] = imgpath else: try: # Ensure image is valid img = image_from_data(data) if itype not in {'png', 'jpg', 'jpeg'}: itype = 'png' if itype == 'gif' else 'jpeg' data = image_to_data(img, fmt=itype) if self.compress_news_images and itype in {'jpg', 'jpeg'}: try: data = self.rescale_image(data) except Exception: self.log.exception('failed to compress image ' + iurl) # Moon+ apparently cannot handle .jpeg files if itype == 'jpeg': itype = 'jpg' imgpath = os.path.join(diskpath, fname + '.' + itype) with self.imagemap_lock: self.imagemap[iurl] = imgpath with open(imgpath, 'wb') as x: x.write(data) tag['src'] = imgpath except Exception: traceback.print_exc() continue def absurl(self, baseurl, tag, key, filter=True): iurl = tag[key] parts = urlparse.urlsplit(iurl) if not parts.netloc and not parts.path and not parts.query: return None if not parts.scheme: iurl = urlparse.urljoin(baseurl, iurl, False) if not self.is_link_ok(iurl): self.log.debug('Skipping invalid link:', iurl) return None if filter and not self.is_link_wanted(iurl, tag): self.log.debug('Filtered link: ' + iurl) return None return iurl def normurl(self, url): parts = list(urlparse.urlsplit(url)) parts[4] = '' return urlparse.urlunsplit(parts) def localize_link(self, tag, key, path): parts = urlparse.urlsplit(tag[key]) suffix = ('#' + parts.fragment) if parts.fragment else '' tag[key] = path + suffix def process_return_links(self, soup, baseurl): for tag in soup.findAll(lambda tag: tag.name.lower() == 'a' and tag. has_key('href')): # noqa iurl = self.absurl(baseurl, tag, 'href') if not iurl: continue nurl = self.normurl(iurl) if self.filemap.has_key(nurl): # noqa self.localize_link(tag, 'href', self.filemap[nurl]) def process_links(self, soup, baseurl, recursion_level, into_dir='links'): res = '' diskpath = os.path.join(self.current_dir, into_dir) if not os.path.exists(diskpath): os.mkdir(diskpath) prev_dir = self.current_dir try: self.current_dir = diskpath tags = list(soup.findAll('a', href=True)) for c, tag in enumerate(tags): if self.show_progress: print '.', sys.stdout.flush() sys.stdout.flush() iurl = self.absurl(baseurl, tag, 'href', filter=recursion_level != 0) if not iurl: continue nurl = self.normurl(iurl) if self.filemap.has_key(nurl): # noqa self.localize_link(tag, 'href', self.filemap[nurl]) continue if self.files > self.max_files: return res linkdir = 'link' + str(c) if into_dir else '' linkdiskpath = os.path.join(diskpath, linkdir) if not os.path.exists(linkdiskpath): os.mkdir(linkdiskpath) try: self.current_dir = linkdiskpath dsrc = self.fetch_url(iurl) newbaseurl = dsrc.newurl if len(dsrc) == 0 or \ len(re.compile('<!--.*?-->', re.DOTALL).sub('', dsrc).strip()) == 0: raise ValueError('No content at URL %r' % iurl) if callable(self.encoding): dsrc = self.encoding(dsrc) elif self.encoding is not None: dsrc = dsrc.decode(self.encoding, 'replace') else: dsrc = xml_to_unicode(dsrc, self.verbose)[0] st = time.time() soup = self.get_soup(dsrc, url=iurl) self.log.debug('Parsed %s in %.1f seconds' % (iurl, time.time() - st)) base = soup.find('base', href=True) if base is not None: newbaseurl = base['href'] self.log.debug('Processing images...') self.process_images(soup, newbaseurl) if self.download_stylesheets: self.process_stylesheets(soup, newbaseurl) _fname = basename(iurl) if not isinstance(_fname, unicode): _fname.decode('latin1', 'replace') _fname = _fname.encode('ascii', 'replace').replace( '%', '').replace(os.sep, '') _fname = ascii_filename(_fname) _fname = os.path.splitext(_fname)[0][:120] + '.xhtml' res = os.path.join(linkdiskpath, _fname) self.downloaded_paths.append(res) self.filemap[nurl] = res if recursion_level < self.max_recursions: self.log.debug('Processing links...') self.process_links(soup, newbaseurl, recursion_level + 1) else: self.process_return_links(soup, newbaseurl) self.log.debug( 'Recursion limit reached. Skipping links in', iurl) if newbaseurl and not newbaseurl.startswith('/'): for atag in soup.findAll( 'a', href=lambda x: x and x.startswith('/')): atag['href'] = urlparse.urljoin( newbaseurl, atag['href'], True) if callable(self.postprocess_html_ext): soup = self.postprocess_html_ext( soup, c == 0 and recursion_level == 0 and not getattr(self, 'called_first', False), self.job_info) if c == 0 and recursion_level == 0: self.called_first = True save_soup(soup, res) self.localize_link(tag, 'href', res) except Exception as err: if isinstance(err, AbortArticle): raise self.failed_links.append((iurl, traceback.format_exc())) self.log.exception('Could not fetch link', iurl) finally: self.current_dir = diskpath self.files += 1 finally: self.current_dir = prev_dir if self.show_progress: print return res
lines = html.split('\n') for i in [10, 50, 120]: charset = chardet.detect('\n'.join(lines[:i]))['encoding'] if charset and charset.lower() != 'ascii': break if charset == None: charset = '' return charset.lower() if __name__ == '__main__': import urllib2 import sys import socket #默认超时如果已经由A3上层统一管理,就不应自己设置默认超时了 socket.getdefaulttimeout() or socket.setdefaulttimeout(5) try: url = sys.argv[1] except: print 'Usage: python charsetck.py http://www.knownsec.com/' sys.exit(0) req = urllib2.Request(url) req.add_header('User-Agent','Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)') usock = urllib2.urlopen(req) headers = usock.headers.dict html = usock.read() usock.close() print check(headers,html)
def read_body_and_headers(url,post=None,headers=[],follow_redirects=False,timeout=None): _log("read_body_and_headers "+url) if post is not None: _log("read_body_and_headers post="+post) if len(headers)==0: headers.append(["User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:18.0) Gecko/20100101 Firefox/18.0"]) # Start cookie lib ficherocookies=os.path.join(get_data_path(),'cookies.dat'); _log("read_body_and_headers cookies_file="+ficherocookies); cj=None; ClientCookie=None; cookielib=None try: _log("read_body_and_headers importing cookielib"); import cookielib # Let's see if cookielib is available except ImportError: _log("read_body_and_headers cookielib no disponible") # If importing cookielib fails # let's try ClientCookie try: _log("read_body_and_headers importing ClientCookie"); import ClientCookie except ImportError: _log("read_body_and_headers ClientCookie not available"); urlopen=urllib2.urlopen; Request=urllib2.Request # ClientCookie isn't available either else: _log("read_body_and_headers ClientCookie available"); urlopen=ClientCookie.urlopen; Request=ClientCookie.Request; cj=ClientCookie.MozillaCookieJar() # imported ClientCookie else: _log("read_body_and_headers cookielib available"); urlopen=urllib2.urlopen; Request=urllib2.Request; cj=cookielib.MozillaCookieJar() # importing cookielib worked # This is a subclass of FileCookieJar # that has useful load and save methods if cj is not None: # we successfully imported # one of the two cookie handling modules _log("read_body_and_headers Cookies enabled") if os.path.isfile(ficherocookies): _log("read_body_and_headers Reading cookie file") try: cj.load(ficherocookies) # if we have a cookie file already saved # then load the cookies into the Cookie Jar except: _log("read_body_and_headers Wrong cookie file, deleting..."); os.remove(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: _log("read_body_and_headers opener using urllib2 (cookielib)") # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 if not follow_redirects: opener=urllib2.build_opener(urllib2.HTTPHandler(debuglevel=http_debug_log_enabled),urllib2.HTTPCookieProcessor(cj),NoRedirectHandler()) else: opener=urllib2.build_opener(urllib2.HTTPHandler(debuglevel=http_debug_log_enabled),urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: _log("read_body_and_headers opener using ClientCookie") # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener=ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)); ClientCookie.install_opener(opener) # ------------------------------------------------- # Cookies instaladas, lanza la petición # ------------------------------------------------- inicio=time.clock() # Contador txheaders={} # Diccionario para las cabeceras if post is None: _log("read_body_and_headers GET request") # Construye el request else: _log("read_body_and_headers POST request") _log("read_body_and_headers ---------------------------") # Añade las cabeceras for header in headers: _log("read_body_and_headers header %s=%s" % (str(header[0]),str(header[1]))); txheaders[header[0]]=header[1] _log("read_body_and_headers ---------------------------"); req=Request(url,post,txheaders) if timeout is None: handle=urlopen(req) else: #Disponible en python 2.6 en adelante --> handle = urlopen(req, timeout=timeout) #Para todas las versiones: try: import socket; deftimeout=socket.getdefaulttimeout(); socket.setdefaulttimeout(timeout); handle=urlopen(req); socket.setdefaulttimeout(deftimeout) except: import sys for line in sys.exc_info(): _log( "%s" % line ) cj.save(ficherocookies) # Actualiza el almacén de cookies # Lee los datos y cierra if handle.info().get('Content-Encoding')=='gzip': buf=StringIO(handle.read()); f=gzip.GzipFile(fileobj=buf); data=f.read() else: data=handle.read() info=handle.info(); _log("read_body_and_headers Response"); returnheaders=[]; _log("read_body_and_headers ---------------------------") for header in info: _log("read_body_and_headers "+header+"="+info[header]); returnheaders.append([header,info[header]]) handle.close(); _log("read_body_and_headers ---------------------------") ''' # Lanza la petición try: response = urllib2.urlopen(req) # Si falla la repite sustituyendo caracteres especiales except: req = urllib2.Request(url.replace(" ","%20")) # Añade las cabeceras for header in headers: req.add_header(header[0],header[1]) response = urllib2.urlopen(req) ''' # Tiempo transcurrido fin=time.clock(); _log("read_body_and_headers Downloaded in %d seconds " % (fin-inicio+1)); _log("read_body_and_headers body="+data); return data,returnheaders
def __init__(self, url = scalaris.DEFAULT_URL, timeout = socket.getdefaulttimeout()): scalaris.JSONConnection.__init__(self, url = scalaris.DEFAULT_URL, timeout = timeout)
def _make_request(self, conn, method, url, timeout=_Default, **httplib_request_kw): """ Perform a request on a given urllib connection object taken from our pool. :param conn: a connection from one of our connection pools :param timeout: Socket timeout in seconds for the request. This can be a float or integer, which will set the same timeout value for the socket connect and the socket read, or an instance of :class:`urllib3.util.Timeout`, which gives you more fine-grained control over your timeouts. """ self.num_requests += 1 timeout_obj = self._get_timeout(timeout) timeout_obj.start_connect() conn.timeout = timeout_obj.connect_timeout # conn.request() calls httplib.*.request, not the method in # urllib3.request. It also calls makefile (recv) on the socket. conn.request(method, url, **httplib_request_kw) # Reset the timeout for the recv() on the socket read_timeout = timeout_obj.read_timeout # App Engine doesn't have a sock attr if hasattr(conn, 'sock'): # In Python 3 socket.py will catch EAGAIN and return None when you # try and read into the file pointer created by http.client, which # instead raises a BadStatusLine exception. Instead of catching # the exception and assuming all BadStatusLine exceptions are read # timeouts, check for a zero timeout before making the request. if read_timeout == 0: raise ReadTimeoutError( self, url, "Read timed out. (read timeout=%s)" % read_timeout) if read_timeout is Timeout.DEFAULT_TIMEOUT: conn.sock.settimeout(socket.getdefaulttimeout()) else: # None or a value conn.sock.settimeout(read_timeout) # Receive the response from the server try: try: # Python 2.7+, use buffering of HTTP responses httplib_response = conn.getresponse(buffering=True) except TypeError: # Python 2.6 and older httplib_response = conn.getresponse() except SocketTimeout: raise ReadTimeoutError( self, url, "Read timed out. (read timeout=%s)" % read_timeout) except BaseSSLError as e: # Catch possible read timeouts thrown as SSL errors. If not the # case, rethrow the original. We need to do this because of: # http://bugs.python.org/issue10272 if 'timed out' in str(e) or \ 'did not complete (read)' in str(e): # Python 2.6 raise ReadTimeoutError(self, url, "Read timed out.") raise except SocketError as e: # Platform-specific: Python 2 # See the above comment about EAGAIN in Python 3. In Python 2 we # have to specifically catch it and throw the timeout error if e.errno in _blocking_errnos: raise ReadTimeoutError( self, url, "Read timed out. (read timeout=%s)" % read_timeout) raise # AppEngine doesn't have a version attr. http_version = getattr(conn, '_http_vsn_str', 'HTTP/?') log.debug("\"%s %s %s\" %s %s" % (method, url, http_version, httplib_response.status, httplib_response.length)) return httplib_response
def connect(self): old_timeout = socket.getdefaulttimeout() self.sock = socket.create_connection((self.host, self.port), timeout=CLIENT_CONNECT_TIMEOUT_S) self.sock.settimeout(old_timeout)
def _node_refresh_businfo(self, node, api, bad_node=False): """ Retrieve bus info from the node and update nodes and edges as appropriate @param node: node name @type node: str @param api: XML-RPC proxy @type api: ServerProxy @param bad_node: If True, node has connectivity issues and should be treated differently @type bad_node: bool """ try: logger.debug("businfo: contacting node [%s] for bus info", node) # unmark bad node, though it stays on the bad list if bad_node: self._unmark_bad_node(node) # Lower the socket timeout as we cannot abide by slow HTTP timeouts. # If a node cannot meet this timeout, it goes on the bad list # TODO: override transport instead. old_timeout = socket.getdefaulttimeout() if bad_node: #even stricter timeout for bad_nodes right now socket.setdefaulttimeout(0.2) else: socket.setdefaulttimeout(1.0) code, msg, bus_info = api.getBusInfo(_ROS_NAME) socket.setdefaulttimeout(old_timeout) except Exception as e: # node is (still) bad self._mark_bad_node(node, str(e)) code = -1 msg = traceback.format_exc() updated = False if code != 1: logger.error("cannot get stats info from node [%s]: %s", node, msg) else: # [[connectionId1, destinationId1, direction1, transport1, ...]... ] for info in bus_info: # #3579 bad node, ignore if len(info) < 5: continue connection_id = info[0] dest_id = info[1] direction = info[2] transport = info[3] topic = info[4] if len(info) > 5: connected = info[5] else: connected = True #backwards compatibility if connected and topic.startswith(self.topic_ns): # blindly add as we will be able to catch state change via edges. # this currently means we don't cleanup topics self.nt_nodes.add(topic_node(topic)) # update node->topic->node graph edges updated = self.nt_edges.add_edges(node, topic_node(topic), direction) or updated # update node->node graph edges if dest_id.startswith('http://'): #print("FOUND URI", dest_id) dest_name = self.uri_node_map.get(dest_id, None) updated = self.nn_edges.add_edges(node, dest_name, direction, topic) or updated else: #TODO: anyting to do here? pass return updated
def _make_request( self, conn, method, url, timeout=_Default, chunked=False, **httplib_request_kw ): """ Perform a request on a given urllib connection object taken from our pool. :param conn: a connection from one of our connection pools :param timeout: Socket timeout in seconds for the request. This can be a float or integer, which will set the same timeout value for the socket connect and the socket read, or an instance of :class:`urllib3.util.Timeout`, which gives you more fine-grained control over your timeouts. """ self.num_requests += 1 timeout_obj = self._get_timeout(timeout) timeout_obj.start_connect() conn.timeout = timeout_obj.connect_timeout # Trigger any extra validation we need to do. try: self._validate_conn(conn) except (SocketTimeout, BaseSSLError) as e: # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout. self._raise_timeout(err=e, url=url, timeout_value=conn.timeout) raise # conn.request() calls http.client.*.request, not the method in # urllib3.request. It also calls makefile (recv) on the socket. try: if chunked: conn.request_chunked(method, url, **httplib_request_kw) else: conn.request(method, url, **httplib_request_kw) # We are swallowing BrokenPipeError (errno.EPIPE) since the server is # legitimately able to close the connection after sending a valid response. # With this behaviour, the received response is still readable. except BrokenPipeError: # Python 3 pass except IOError as e: # Python 2 and macOS/Linux # EPIPE and ESHUTDOWN are BrokenPipeError on Python 2, and EPROTOTYPE is needed on macOS # https://erickt.github.io/blog/2014/11/19/adventures-in-debugging-a-potential-osx-kernel-bug/ if e.errno not in { errno.EPIPE, errno.ESHUTDOWN, errno.EPROTOTYPE, }: raise # Reset the timeout for the recv() on the socket read_timeout = timeout_obj.read_timeout # App Engine doesn't have a sock attr if getattr(conn, "sock", None): # In Python 3 socket.py will catch EAGAIN and return None when you # try and read into the file pointer created by http.client, which # instead raises a BadStatusLine exception. Instead of catching # the exception and assuming all BadStatusLine exceptions are read # timeouts, check for a zero timeout before making the request. if read_timeout == 0: raise ReadTimeoutError( self, url, "Read timed out. (read timeout=%s)" % read_timeout ) if read_timeout is Timeout.DEFAULT_TIMEOUT: conn.sock.settimeout(socket.getdefaulttimeout()) else: # None or a value conn.sock.settimeout(read_timeout) # Receive the response from the server try: try: # Python 2.7, use buffering of HTTP responses httplib_response = conn.getresponse(buffering=True) except TypeError: # Python 3 try: httplib_response = conn.getresponse() except BaseException as e: # Remove the TypeError from the exception chain in # Python 3 (including for exceptions like SystemExit). # Otherwise it looks like a bug in the code. six.raise_from(e, None) except (SocketTimeout, BaseSSLError, SocketError) as e: self._raise_timeout(err=e, url=url, timeout_value=read_timeout) raise # AppEngine doesn't have a version attr. http_version = getattr(conn, "_http_vsn_str", "HTTP/?") log.debug( '%s://%s:%s "%s %s %s" %s %s', self.scheme, self.host, self.port, method, url, http_version, httplib_response.status, httplib_response.length, ) try: assert_header_parsing(httplib_response.msg) except (HeaderParsingError, TypeError) as hpe: # Platform-specific: Python 3 log.warning( "Failed to parse headers (url=%s): %s", self._absolute_url(url), hpe, exc_info=True, ) return httplib_response
def Send(self, request_path, payload="", content_type="application/octet-stream", timeout=None, **kwargs): """Sends an RPC and returns the response. Args: request_path: The path to send the request to, eg /api/appversion/create. payload: The body of the request, or None to send an empty request. content_type: The Content-Type header to use. timeout: timeout in seconds; default None i.e. no timeout. (Note: for large requests on OS X, the timeout doesn't work right.) kwargs: Any keyword arguments are converted into query string parameters. Returns: The response body, as a string. """ old_timeout = socket.getdefaulttimeout() socket.setdefaulttimeout(timeout) try: tries = 0 auth_tried = False while True: tries += 1 url = "%s://%s%s" % (self.scheme, self.host, request_path) if kwargs: url += "?" + six.moves.urllib.parse.urlencode(sorted(kwargs.items())) req = self._CreateRequest(url=url, data=payload) req.add_header("Content-Type", content_type) req.add_header("X-appcfg-api-version", "1") try: logger.debug('Sending %s request:\n%s', self.scheme.upper(), HttpRequestToString(req, include_data=self.debug_data)) f = self.opener.open(req) response = f.read() f.close() return response except six.moves.urllib.error.HTTPError as e: logger.debug("Got http error, this is try #%s", tries) if tries > self.rpc_tries: raise elif e.code == 401: if auth_tried: raise auth_tried = True self._Authenticate() elif e.code >= 500 and e.code < 600: continue elif e.code == 302: if auth_tried: raise auth_tried = True loc = e.info()["location"] logger.debug("Got 302 redirect. Location: %s", loc) if loc.startswith("https://www.google.com/accounts/ServiceLogin"): self._Authenticate() elif re.match( r"https://www\.google\.com/a/[a-z0-9\.\-]+/ServiceLogin", loc): self.account_type = os.getenv("APPENGINE_RPC_HOSTED_LOGIN_TYPE", "HOSTED") self._Authenticate() elif loc.startswith("http://%s/_ah/login" % (self.host,)): self._DevAppServerAuthenticate() else: raise else: raise finally: socket.setdefaulttimeout(old_timeout)