Пример #1
1
    def feedsStatus(self):
        from urllib import urlopen
        import socket

        self["tl_red"].hide()
        self["tl_yellow"].hide()
        self["tl_green"].hide()
        currentTimeoutDefault = socket.getdefaulttimeout()
        socket.setdefaulttimeout(3)
        try:
            d = urlopen("http://openvix.co.uk/TrafficLightState.php")
            self.trafficLight = d.read()
            if self.trafficLight == "unstable":
                self["tl_off"].hide()
                self["tl_red"].show()
            elif self.trafficLight == "updating":
                self["tl_off"].hide()
                self["tl_yellow"].show()
            elif self.trafficLight == "stable":
                self["tl_off"].hide()
                self["tl_green"].show()
            else:
                self.trafficLight = "unknown"
                self["tl_off"].show()
        except:
            self.trafficLight = "unknown"
            self["tl_off"].show()
        socket.setdefaulttimeout(currentTimeoutDefault)
Пример #2
0
    def get_remote_applications_icon (self):
        logging.info ("get remote applications icon")
        for n in self.notification:
            if not str (n['app_id']) in self.app_ids:
                self.app_ids.append (str (n['app_id']))

        ids_str = ", ".join (self.app_ids)
        qstr = "SELECT icon_url, app_id FROM application WHERE app_id IN (%s)" % ids_str
        apps = self._query (qstr)

        default_timeout = socket.getdefaulttimeout ()
        socket.setdefaulttimeout (GET_ICON_TIMEOUT)
        logging.debug ("socket timeout: %s" % socket.getdefaulttimeout ())
        timeout_count = 0
        for app in apps:
            if timeout_count < 3:
                try:
                    icon_name = self.get_remote_icon \
                        (app['icon_url'], self.app_icons_dir)
                except TimeoutError:
                    logging.debug ("timeout")
                    timeout_count += 1
                    icon_name = ""
                except NoUpdateError:
                    logging.debug ("No need update")
                    icon_name = os.path.basename \
                            (urlparse.urlsplit (app['icon_url']).path)
            else:
                icon_name = ""
            self.applications[app['app_id']] = {'icon_name': icon_name}
        socket.setdefaulttimeout (default_timeout)
        self.refresh_status["apps_icon"] = True
Пример #3
0
    def __init__(self, family_or_realsock=socket.AF_INET, *args, **kwargs):
        should_set_nonblocking = kwargs.pop('set_nonblocking', True)
        if isinstance(family_or_realsock, (int, long)):
            fd = _original_socket(family_or_realsock, *args, **kwargs)
        else:
            fd = family_or_realsock

        # import timeout from other socket, if it was there
        try:
            self._timeout = fd.gettimeout() or socket.getdefaulttimeout()
        except AttributeError:
            self._timeout = socket.getdefaulttimeout()

        if should_set_nonblocking:
            set_nonblocking(fd)
        self.fd = fd
        # when client calls setblocking(0) or settimeout(0) the socket must
        # act non-blocking
        self.act_non_blocking = False

        # Copy some attributes from underlying real socket.
        # This is the easiest way that i found to fix
        # https://bitbucket.org/eventlet/eventlet/issue/136
        # Only `getsockopt` is required to fix that issue, others
        # are just premature optimization to save __getattr__ call.
        self.bind = fd.bind
        self.close = fd.close
        self.fileno = fd.fileno
        self.getsockname = fd.getsockname
        self.getsockopt = fd.getsockopt
        self.listen = fd.listen
        self.setsockopt = fd.setsockopt
        self.shutdown = fd.shutdown
Пример #4
0
def test_update_feeds(hacks_feed, mocked_parse):
    """update_feeds adds new entries, resets timeout."""
    assert socket.getdefaulttimeout() is None
    count = update_feeds()
    assert count == 2
    assert Entry.objects.count() == 2
    assert socket.getdefaulttimeout() is None
Пример #5
0
    def testTimeoutAttribute(self):
        # This will prove that the timeout gets through HTTPConnection
        # and into the socket.

        # default -- use global socket timeout
        self.assertIsNone(socket.getdefaulttimeout())
        socket.setdefaulttimeout(30)
        try:
            httpConn = client.HTTPConnection(HOST, TimeoutTest.PORT)
            httpConn.connect()
        finally:
            socket.setdefaulttimeout(None)
        self.assertEqual(httpConn.sock.gettimeout(), 30)
        httpConn.close()

        # no timeout -- do not use global socket default
        self.assertIsNone(socket.getdefaulttimeout())
        socket.setdefaulttimeout(30)
        try:
            httpConn = client.HTTPConnection(HOST, TimeoutTest.PORT,
                                              timeout=None)
            httpConn.connect()
        finally:
            socket.setdefaulttimeout(None)
        self.assertEqual(httpConn.sock.gettimeout(), None)
        httpConn.close()

        # a value
        httpConn = client.HTTPConnection(HOST, TimeoutTest.PORT, timeout=30)
        httpConn.connect()
        self.assertEqual(httpConn.sock.gettimeout(), 30)
        httpConn.close()
Пример #6
0
 def get_node(cls, external_manager, proxy_class, hostname=None, ssh_port=22, username='******', password=None, isRemote=False, use_keys=False):
     key = cls.get_key(proxy_class, hostname, ssh_port, username, password, isRemote, use_keys)
     if cls.nodes.get(key) is not None:
         return cls.nodes[key]
     try:
         import time
         import datetime
         start = datetime.datetime.now()
         print 'NODE_PROXY : START ',proxy_class,hostname
         print socket.getdefaulttimeout()
         print start
         node = NodeWrapper(external_manager,proxy_class,hostname,ssh_port,username,password,isRemote,use_keys)
     finally:
         now = datetime.datetime.now()    
         print 'NODE_PROXY : END ',hostname,socket.getdefaulttimeout()
         print (now - start).seconds
     cls._node_pool_lock.acquire()
     try:
         if cls.nodes.get(key) is None:
             cls.nodes[key] = node
             print 'Adding to NodePool'
         else:
             node.cleanup()
         return cls.nodes[key]
     finally:
         cls._node_pool_lock.release()
Пример #7
0
    def testDefaultTimeout(self):
        # Testing default timeout
        # The default timeout should initially be None
        self.assertEqual(socket.getdefaulttimeout(), None)
        s = socket.socket()
        self.assertEqual(s.gettimeout(), None)
        s.close()

        # Set the default timeout to 10, and see if it propagates
        socket.setdefaulttimeout(10)
        self.assertEqual(socket.getdefaulttimeout(), 10)
        s = socket.socket()
        self.assertEqual(s.gettimeout(), 10)
        s.close()

        # Reset the default timeout to None, and see if it propagates
        socket.setdefaulttimeout(None)
        self.assertEqual(socket.getdefaulttimeout(), None)
        s = socket.socket()
        self.assertEqual(s.gettimeout(), None)
        s.close()

        # Check that setting it to an invalid value raises ValueError
        self.assertRaises(ValueError, socket.setdefaulttimeout, -1)

        # Check that setting it to an invalid type raises TypeError
        self.assertRaises(TypeError, socket.setdefaulttimeout, "spam")
Пример #8
0
def test_update_feeds_resets_timeout_on_exception(mock_update, hacks_feed):
    """update_feeds resets the socket timeout even on an exception."""
    assert socket.getdefaulttimeout() is None
    mock_update.side_effect = Exception('Failure')
    with pytest.raises(Exception):
        update_feeds()
    assert socket.getdefaulttimeout() is None
Пример #9
0
    def fetch(self, server):
        """
        This function gets your IP from a specific server
        """
        t = None
        socket_default_timeout = socket.getdefaulttimeout()
        opener = urllib.build_opener()
        opener.addheaders = [('User-agent',
                              "Mozilla/5.0 (X11; Linux x86_64; rv:24.0)"
                              " Gecko/20100101 Firefox/24.0")]

        try:
            # Close url resource if fetching not finished within timeout.
            t = Timer(self.timeout, self.handle_timeout, [self.url])
            t.start()

            # Open URL.
            if version_info[0:2] == (2, 5):
                # Support for Python 2.5.* using socket hack
                # (Changes global socket timeout.)
                socket.setdefaulttimeout(self.timeout)
                self.url = opener.open(server)
            else:
                self.url = opener.open(server, timeout=self.timeout)
    
            # Read response.
            content = self.url.read()

            # Didn't want to import chardet. Prefered to stick to stdlib
            if PY3K:
                try:
                    content = content.decode('UTF-8')
                except UnicodeDecodeError:
                    content = content.decode('ISO-8859-1')

            p = '(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.('
            p += '25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|['
            p += '01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'
            m = re.search(
                p,
                content)
            myip = m.group(0)
            if len(myip) > 0:
                return myip
            else:
                return ''
        except Exception as e:
            print(e)
            return ''
        finally:
            if self.url is not None:
                self.url.close()
                self.url = None
            if t is not None:
                t.cancel()

            # Reset default socket timeout.
            if socket.getdefaulttimeout() != socket_default_timeout:
                socket.setdefaulttimeout(socket_default_timeout)
Пример #10
0
 def _socket_create_connection(address, timeout=None):
     if timeout is None:
         timeout = socket.getdefaulttimeout()
     sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
     oldtimeout = socket.getdefaulttimeout()
     socket.setdefaulttimeout(timeout)
     sock.connect(address)
     socket.setdefaulttimeout(oldtimeout)
     return sock
Пример #11
0
 def test_timeout_reset_after_call(self):
     old_timeout = socket.getdefaulttimeout()
     self.stub_urlopen_with_timeout_check(30, None, "url")
     try:
         socket.setdefaulttimeout(1234)
         base_utils.urlopen("url", timeout=30)
         self.assertEquals(1234, socket.getdefaulttimeout())
     finally:
         socket.setdefaulttimeout(old_timeout)
Пример #12
0
 def getTimeout(self):
     """
     Return the timeout set for this session. Should be set in any case,
     but just to be sure if..else struct.
     """
     if(socket.getdefaulttimeout() != None):
         return socket.getdefaulttimeout()
     else:
         return 'No timeout set'
Пример #13
0
 def testSocketTimeout(self):
     dict = {'logger': self.logger,
             'endpoint': 'https://github.com/dmwm',
             'cacheduration': None,
             'timeout': 10,
             }
     service = Service(dict)
     deftimeout = socket.getdefaulttimeout()
     service.getData('%s/socketresettest' % self.testDir, '/WMCore/blob/master/setup.py#L11')
     assert deftimeout == socket.getdefaulttimeout()
Пример #14
0
 def testPing(self):
     import socket
     oldTimeOut = socket.getdefaulttimeout()
     
     from angel_app.resource.remote.clone import Clone
     cc = Clone("80.219.195.84", 6221)
     assert False == cc.ping()
     
     dd = Clone("localhost")
     assert True == dd.ping(), "Make sure you have a local provider instance running."
     
     assert oldTimeOut == socket.getdefaulttimeout()
Пример #15
0
 def testSocketTimeout(self):
     dict = {'logger': self.logger,
             'endpoint':'http://cmssw.cvs.cern.ch/',
             'cacheduration': None,
             'timeout': 10,
             #'cachepath' : self.cache_path,
             #'req_cache_path': '%s/requests' % self.cache_path
             }
     service = Service(dict)
     deftimeout = socket.getdefaulttimeout()
     service.getData('%s/socketresettest' % self.testDir, '/cgi-bin/cmssw.cgi')
     assert deftimeout == socket.getdefaulttimeout()
Пример #16
0
    def __init__ (self, family = socket.AF_INET, type = socket.SOCK_STREAM, proto = 0, _sock = None,
                  _hub = None):
        """
        Initialize the UV socket

        :param family_or_realsock: a socket descriptor or a socket family
        """
        self.uv_fd = None
        self.uv_handle = None
        self.uv_hub = None
        self.uv_recv_string = ''                    # buffer for receiving data...

        if isinstance(family, (int, long)):
            self.uv_fd = _original_socket(family, type, proto, _sock)
        elif isinstance(family, GreenSocket):
            _sock = family
            self.uv_fd = _sock.uv_fd
            if hasattr(_sock, 'uv_hub') and _sock.uv_hub:
                _hub = _sock.uv_hub
        else:
            _sock = family
            self.uv_fd = _sock

        if not self.uv_hub:
            if _hub:
                self.uv_hub = _hub
            else:
                self.uv_hub = weakref.proxy(get_hub())

        ## check if the socket type is supported by pyUV and we can create a pyUV socket...
        if not self.uv_handle:
            if self.type == socket.SOCK_STREAM:
                self.uv_handle = pyuv.TCP(self.uv_hub.uv_loop)
                self.uv_handle.open(self.fileno())
            elif self.type == socket.SOCK_DGRAM:
                self.uv_handle = pyuv.UDP(self.uv_hub.uv_loop)
                self.uv_handle.open(self.fileno())

        # import timeout from other socket, if it was there
        try:
            self._timeout = self.uv_fd.gettimeout() or socket.getdefaulttimeout()
        except AttributeError:
            self._timeout = socket.getdefaulttimeout()

        assert self.uv_fd, 'the socket descriptor must be not null'

        set_nonblocking(self.uv_fd)

        # when client calls setblocking(0) or settimeout(0) the socket must act non-blocking
        self.act_non_blocking = False
Пример #17
0
def retry_http(tries, backoff=2, on_failure='error'):
    """
    Retry a function or method reading from the internet until no socket or IOError
    is raised
    
    delay sets the initial delay, and backoff sets how much the delay should
    lengthen after each failure. backoff must be greater than 1, or else it
    isn't really a backoff. tries must be at least 0, and delay greater than 0.
    """
    delay = socket.getdefaulttimeout()
    o_delay = socket.getdefaulttimeout()
    if backoff <= 1:
      raise ValueError("backoff must be greater than 1")

    tries = math.floor(tries)
    if tries < 0:
      raise ValueError("tries must be 0 or greater")

    if delay <= 0:
      delay = 15.
      o_delay = 15.
      socket.setdefaulttimeout(delay)
      #raise ValueError("delay must be greater than 0")

    def deco_retry(f):
      def f_retry(*args, **kwargs):
        mtries, mdelay = tries, delay # make mutable
        
        while mtries > 0:
          try:
              rv = f(*args, **kwargs) # Try again
          except IOError,msg:
              rv = False
          except socket.error:
              rv = False
              
          if rv != False: # Done on success
            return rv
          mtries -= 1      # consume an attempt
          socket.setdefaulttimeout(mdelay) # wait...
          mdelay *= backoff  # make future wait longer
          logger.error("URL timeout: %d attempts remaining (delay=%.1fs)"%(mtries,mdelay))
        logger.critical("URL timeout: number of trials exceeded")
        if on_failure=='error':
          raise IOError,msg # Ran out of tries :-(
        else:
          logger.critical("URL Failed, but continuing...")
          return None
	def checkTraficLight(self):

		self.activityTimer.callback.remove(self.checkTraficLight)
		self.activityTimer.start(100, False)

		currentTimeoutDefault = socket.getdefaulttimeout()
		socket.setdefaulttimeout(3)
		message = ""
		picon = None
		default = True
		try:
			# TODO: Use Twisted's URL fetcher, urlopen is evil. And it can
			# run in parallel to the package update.
			status = urlopen("http://openpli.org/status/").read().split('!', 1)
			if getBoxType() in status[0].split(','):
				message = len(status) > 1 and status[1] or _("The current beta image might not be stable.\nFor more information see %s.") % ("www.openpli.org")
				picon = MessageBox.TYPE_ERROR
				default = False
		except:
			message = _("The status of the current beta image could not be checked because %s can not be reached.") % ("www.openpli.org")
			picon = MessageBox.TYPE_ERROR
			default = False
		socket.setdefaulttimeout(currentTimeoutDefault)
		if default:
			self.showDisclaimer()
		else:
			message += "\n" + _("Do you want to update your receiver?")
			self.session.openWithCallback(self.startActualUpdate, MessageBox, message, default = default, picon = picon)
Пример #19
0
 def test_http_basic(self):
     self.assertTrue(socket.getdefaulttimeout() is None)
     url = "http://www.python.org"
     with support.transient_internet(url, timeout=None):
         u = _urlopen_with_retry(url)
         self.addCleanup(u.close)
         self.assertTrue(u.fp.raw._sock.gettimeout() is None)
Пример #20
0
    def get_tags():
        socket_to = None
        try:
            socket_to = socket.getdefaulttimeout()
            socket.setdefaulttimeout(EC2.TIMEOUT)
        except Exception:
            pass

        try:
            iam_role = urllib2.urlopen(EC2.URL + "/iam/security-credentials").read().strip()
            iam_params = json.loads(urllib2.urlopen(EC2.URL + "/iam/security-credentials" + "/" + unicode(iam_role)).read().strip())
            from checks.libs.boto.ec2.connection import EC2Connection
            connection = EC2Connection(aws_access_key_id=iam_params['AccessKeyId'], aws_secret_access_key=iam_params['SecretAccessKey'], security_token=iam_params['Token'])
            instance_object = connection.get_only_instances([EC2.metadata['instance-id']])[0]

            EC2_tags = [u"%s:%s" % (tag_key, tag_value) for tag_key, tag_value in instance_object.tags.iteritems()]

        except Exception:
            log.exception("Problem retrieving custom EC2 tags")
            EC2_tags = []

        try:
            if socket_to is None:
                socket_to = 3
            socket.setdefaulttimeout(socket_to)
        except Exception:
            pass

        return EC2_tags
Пример #21
0
def urlopen(url, retries=3, codes=(408, 500, 502, 503, 504), timeout=None):
    """Open url, optionally retrying if an error is encountered.

    Socket and other IO errors will always be retried if retries > 0.
    HTTP errors are retried if the error code is passed in ``codes``.

    :param retries: Number of time to retry.
    :param codes: HTTP error codes that should be retried.

    """
    attempts = 0
    while True:
        try:
            return urllib2.urlopen(url, timeout=timeout)
        except IOError as e:
            no_retry = isinstance(e, urllib2.HTTPError) and e.code not in codes
            if attempts < retries and not no_retry:
                attempts += 1
                continue
            else:
                try:
                    url_string = url.get_full_url()  # if url is Request obj
                except Exception:
                    url_string = url
                if timeout is None:
                    timeout = socket.getdefaulttimeout()
                log.exception(
                    'Failed after %s retries on url with a timeout of %s: %s: %s',
                    attempts, timeout, url_string, e)
                raise e
Пример #22
0
def myproxy(url):
	req = urllib2.Request(url)
	try:

		# Important or if the remote server is slow
		# all our web server threads get stuck here
		# But this is UGLY as Python does not provide per-thread
		# or per-socket timeouts thru urllib
		orignal_timeout = socket.getdefaulttimeout()
		try:
			socket.setdefaulttimeout(60)

			response = urllib2.urlopen(req)
		finally:
			# restore orignal timeoout
			socket.setdefaulttimeout(orignal_timeout)

		# XXX: How to stream respone through Zope
		# AFAIK - we cannot do it currently

		return response.read()

	except HTTPError, e:
		# Have something more useful to log output as plain urllib exception
		# using Python logging interface
		# http://docs.python.org/library/logging.html
		logger.error("Server did not return HTTP 200 when calling remote proxy URL:" + url)
		for key, value in params.items():
			logger.error(key + ": "  + value)

		# Print the server-side stack trace / error page
		logger.error(e.read())

		raise e
Пример #23
0
    def _call_api(self, api_url, err_env):
        """urlopen(), plus error handling and possible retries.

        err_env is a dict of additional info passed to the error handler
        """
        while True:  # may retry on error
            api_request = urllib2.Request(
                api_url, headers={"Accept-Encoding": "gzip"})

            log.debug("Amazon URL: %s" % api_url)

            try:
                if self.Timeout and sys.version[:3] in ["2.4", "2.5"]:
                    # urllib2.urlopen() doesn't accept timeout until 2.6
                    old_timeout = socket.getdefaulttimeout()
                    try:
                        socket.setdefaulttimeout(self.Timeout)
                        return urllib2.urlopen(api_request)
                    finally:
                        socket.setdefaulttimeout(old_timeout)
                else:
                    # the simple way
                    return urllib2.urlopen(api_request, timeout=self.Timeout)
            except:
                if not self.ErrorHandler:
                    raise

                exception = sys.exc_info()[1]  # works in Python 2 and 3
                err = {'exception': exception}
                err.update(err_env)
                if not self.ErrorHandler(err):
                    raise
Пример #24
0
    def _get_metadata(agentConfig):
        if GCE.metadata is not None:
            return GCE.metadata

        if not agentConfig['collect_instance_metadata']:
            log.info("Instance metadata collection is disabled. Not collecting it.")
            GCE.metadata = {}
            return GCE.metadata

        socket_to = None
        try:
            socket_to = socket.getdefaulttimeout()
            socket.setdefaulttimeout(GCE.TIMEOUT)
        except Exception:
            pass

        try:
            opener = urllib2.build_opener()
            opener.addheaders = [('X-Google-Metadata-Request','True')]
            GCE.metadata = json.loads(opener.open(GCE.URL).read().strip())

        except Exception:
            GCE.metadata = {}

        try:
            if socket_to is None:
                socket_to = 3
            socket.setdefaulttimeout(socket_to)
        except Exception:
            pass
        return GCE.metadata
Пример #25
0
 def _socket_timeout(*args, **kwargs):
     old_timeout = socket.getdefaulttimeout()
     socket.setdefaulttimeout(timeout)
     try:
         return func(*args, **kwargs)
     finally:
         socket.setdefaulttimeout(old_timeout)
Пример #26
0
    def __init__(self, family=_socket.AF_INET, type=_socket.SOCK_STREAM, proto=0, _sock=None):

        if _sock is None:
            _sock = _socket_socket(family, type, proto)
        self.__socket = _sock
        self.__socket.setblocking(0)
        self.__timeout = _socket.getdefaulttimeout()
Пример #27
0
def socket_timeout(timeout):
    """Context manager to temporarily set the default socket timeout."""
    old = socket.getdefaulttimeout()
    try:
        yield
    finally:
        socket.setdefaulttimeout(old)
Пример #28
0
 def __service(self, url, params=None, timeout=50):
     old_timeout = socket.getdefaulttimeout()
     socket.setdefaulttimeout(timeout)
     try:
         # POST
         if params:
             self.logger.debug('post %s params[%s]' % (url, params))
             request = urllib2.Request(url, urllib.urlencode(params))
         # GET
         else:
             self.logger.debug('get %s params[%s]' % (url, params))
             request = urllib2.Request(url)
         request.add_header('Accept-Language', 'zh-cn')
         response = urllib2.urlopen(request)
         content = response.read()
         response.close()
         self.logger.debug('content->%s, code->%d'
                           % (content, response.code))
         if response.code == 200:
             return content, True
         return content, False
     except Exception as ex:
         return str(ex), False
     finally:
         socket.setdefaulttimeout(old_timeout)
Пример #29
0
def gdalurlopen(url):
    timeout = 10
    old_timeout = socket.getdefaulttimeout()
    socket.setdefaulttimeout(timeout)

    if 'GDAL_HTTP_PROXY' in os.environ:
        proxy = os.environ['GDAL_HTTP_PROXY']

        if 'GDAL_HTTP_PROXYUSERPWD' in os.environ:
            proxyuserpwd = os.environ['GDAL_HTTP_PROXYUSERPWD']
            proxyHandler = urllib2.ProxyHandler({"http" : \
                "http://%s@%s" % (proxyuserpwd, proxy)})
        else:
            proxyuserpwd = None
            proxyHandler = urllib2.ProxyHandler({"http" : \
                "http://%s" % (proxy)})

        opener = urllib2.build_opener(proxyHandler, urllib2.HTTPHandler)

        urllib2.install_opener(opener)

    try:
        handle = urllib2.urlopen(url)
        socket.setdefaulttimeout(old_timeout)
        return handle
    except urllib2.HTTPError, e:
        print('HTTP service for %s is down (HTTP Error: %d)' % (url, e.code))
        socket.setdefaulttimeout(old_timeout)
        return None
Пример #30
0
    def execute(self):
        if self.ctrl_file_data:
            uploading_kernel = 'kernel' in self.ctrl_file_data
            if uploading_kernel:
                default_timeout = socket.getdefaulttimeout()
                socket.setdefaulttimeout(topic_common.UPLOAD_SOCKET_TIMEOUT)
                print 'Uploading Kernel: this may take a while...',
                sys.stdout.flush()
            try:
                cf_info = self.execute_rpc(op='generate_control_file',
                                           item=self.jobname,
                                           **self.ctrl_file_data)
            finally:
                if uploading_kernel:
                    socket.setdefaulttimeout(default_timeout)

            if uploading_kernel:
                print 'Done'
            self.data['control_file'] = cf_info['control_file']
            if 'synch_count' not in self.data:
                self.data['synch_count'] = cf_info['synch_count']
            if cf_info['is_server']:
                self.data['control_type'] = 'Server'
            else:
                self.data['control_type'] = 'Client'

            # Get the union of the 2 sets of dependencies
            deps = set(self.data['dependencies'])
            deps = sorted(deps.union(cf_info['dependencies']))
            self.data['dependencies'] = list(deps)

        if 'synch_count' not in self.data and 'hostless' not in self.data:
            self.data['synch_count'] = 1

        return self.create_job()
Пример #31
0
def urlopener(url_or_request, log, **kwargs):
    """
    Utility function for pulling back a url, with a retry of 3 times, increasing the timeout, etc.
    Re-raises any errors as URLError.

    .. warning:: This is being replaced by requests library.
                 flexget.utils.requests should be used going forward.

    :param str url_or_request: URL or Request object to get.
    :param log: Logger to log debug info and errors to
    :param kwargs: Keyword arguments to be passed to urlopen
    :return: The file-like object returned by urlopen
    """
    from flexget.utils.requests import is_unresponsive, set_unresponsive

    if isinstance(url_or_request, urllib2.Request):
        url = url_or_request.get_host()
    else:
        url = url_or_request
    if is_unresponsive(url):
        msg = '%s is known to be unresponsive, not trying again.' % urlparse(url).hostname
        log.warning(msg)
        raise urllib2.URLError(msg)

    retries = kwargs.get('retries', 3)
    timeout = kwargs.get('timeout', 15.0)

    # get the old timeout for sockets, so we can set it back to that when done. This is NOT threadsafe by the way.
    # In order to avoid requiring python 2.6, we're not using the urlopen timeout parameter. That really should be used
    # after checking for python 2.6.
    oldtimeout = socket.getdefaulttimeout()
    try:
        socket.setdefaulttimeout(timeout)

        handlers = [SmartRedirectHandler()]
        if urllib2._opener:
            handlers.extend(urllib2._opener.handlers)
        if kwargs.get('handlers'):
            handlers.extend(kwargs['handlers'])
        if len(handlers) > 1:
            handler_names = [h.__class__.__name__ for h in handlers]
            log.debug('Additional handlers have been specified for this urlopen: %s' % ', '.join(handler_names))
        opener = urllib2.build_opener(*handlers).open
        for i in range(retries):  # retry getting the url up to 3 times.
            if i > 0:
                time.sleep(3)
            try:
                retrieved = opener(url_or_request, kwargs.get('data'))
            except urllib2.HTTPError as e:
                if e.code < 500:
                    # If it was not a server error, don't keep retrying.
                    log.warning('Could not retrieve url (HTTP %s error): %s' % (e.code, e.url))
                    raise
                log.debug('HTTP error (try %i/%i): %s' % (i + 1, retries, e.code))
            except (urllib2.URLError, socket.timeout) as e:
                if hasattr(e, 'reason'):
                    reason = str(e.reason)
                else:
                    reason = 'N/A'
                if reason == 'timed out':
                    set_unresponsive(url)
                log.debug('Failed to retrieve url (try %i/%i): %s' % (i + 1, retries, reason))
            except httplib.IncompleteRead as e:
                log.critical('Incomplete read - see python bug 6312')
                break
            else:
                # make the returned instance usable in a with statement by adding __enter__ and __exit__ methods

                def enter(self):
                    return self

                def exit(self, exc_type, exc_val, exc_tb):
                    self.close()

                retrieved.__class__.__enter__ = enter
                retrieved.__class__.__exit__ = exit
                return retrieved

        log.warning('Could not retrieve url: %s' % url_or_request)
        raise urllib2.URLError('Could not retrieve url after %s tries.' % retries)
    finally:
        socket.setdefaulttimeout(oldtimeout)
Пример #32
0
import socket
    import urllib2
    import threading
    import sys
    import Queue
    import socket

    socket.setdefaulttimeout(7)

    print("Bobng's proxy checker. Using %s second timeout"%(socket.getdefaulttimeout()))

    #input_file = sys.argv[1]
    #proxy_type = sys.argv[2] #options: http,s4,s5
    #output_file = sys.argv[3]
    input_file = 'proxylist.txt'
    proxy_type = 'http'
    output_file = 'proxy_alive.txt'

    url = "www.seemyip.com" # Don't put http:// in here, or any /'s

    check_queue = Queue.Queue()
    output_queue = Queue.Queue()
    threads = 20

    def writer(f,rq):
        while True:
            line = rq.get()
            f.write(line+'\n')

    def checker(q,oq):
        while True:
Пример #33
0
def downloadpage(url, post=None, headers=None, timeout=None, follow_redirects=True, cookies=True, replace_headers=False,
                 add_referer=False, only_headers=False, bypass_cloudflare=True, count_retries=0, count_retries_tot=5, random_headers=False, ignore_response_code=False, alfa_s=False, proxy=True, proxy_web=False, proxy_addr_forced=None,forced_proxy=None, proxy_retries=1):
    """
    Abre una url y retorna los datos obtenidos

    @param url: url que abrir.
    @type url: str
    @param post: Si contiene algun valor este es enviado mediante POST.
    @type post: str
    @param headers: Headers para la petición, si no contiene nada se usara los headers por defecto.
    @type headers: dict, list
    @param timeout: Timeout para la petición.
    @type timeout: int
    @param follow_redirects: Indica si se han de seguir las redirecciones.
    @type follow_redirects: bool
    @param cookies: Indica si se han de usar las cookies.
    @type cookies: bool
    @param replace_headers: Si True, los headers pasados por el parametro "headers" sustituiran por completo los headers por defecto.
                            Si False, los headers pasados por el parametro "headers" modificaran los headers por defecto.
    @type replace_headers: bool
    @param add_referer: Indica si se ha de añadir el header "Referer" usando el dominio de la url como valor.
    @type add_referer: bool
    @param only_headers: Si True, solo se descargarán los headers, omitiendo el contenido de la url.
    @type only_headers: bool
    @param random_headers: Si True, utiliza el método de seleccionar headers aleatorios.
    @type random_headers: bool
    @param ignore_response_code: Si es True, ignora el método para WebErrorException para error como el error 404 en veseriesonline, pero es un data funcional
    @type ignore_response_code: bool
    @return: Resultado de la petición
    @rtype: HTTPResponse

            Parametro               Tipo    Descripción
            ----------------------------------------------------------------------------------------------------------------
            HTTPResponse.sucess:    bool   True: Peticion realizada correctamente | False: Error al realizar la petición
            HTTPResponse.code:      int    Código de respuesta del servidor o código de error en caso de producirse un error
            HTTPResponse.error:     str    Descripción del error en caso de producirse un error
            HTTPResponse.headers:   dict   Diccionario con los headers de respuesta del servidor
            HTTPResponse.data:      str    Respuesta obtenida del servidor
            HTTPResponse.time:      float  Tiempo empleado para realizar la petición

    """

    response = {}

    # Headers por defecto, si no se especifica nada
    request_headers = default_headers.copy()

    # Headers pasados como parametros
    if headers is not None:
        if not replace_headers:
            request_headers.update(dict(headers))
        else:
            request_headers = dict(headers)

    if add_referer:
        request_headers["Referer"] = "/".join(url.split("/")[:3])
        
    if random_headers or HTTPTOOLS_DEFAULT_RANDOM_HEADERS:
        request_headers['User-Agent'] = random_useragent()

    url = urllib.quote(url, safe="%/:=&?~#+!$,;'@()*[]")

    #Si la descarga requiere que se haga a través de un servicio Proxy o ProxyWeb, se prepara la url
    proxy_retries_counter = 0
    url_save = url
    post_save = post
    while proxy_retries_counter <= proxy_retries:
        # Handlers init
        handlers = [urllib2.HTTPHandler(debuglevel=False)]
        
        proxy_retries_counter += 1
        proxy_stat = ''
        proxy_addr = ''
        proxy_CF_addr = ''
        proxy_web_name = ''
        proxy_log = ''
        
        try:
            if (proxy or proxy_web) and (forced_proxy or proxy_addr_forced or channel_proxy_list(url, forced_proxy=forced_proxy)):
                import proxytools
                proxy_addr, proxy_CF_addr, proxy_web_name, proxy_log = proxytools.get_proxy_addr(url, post=post, forced_proxy=forced_proxy)
                if proxy_addr_forced and proxy_log:
                    import scrapertools
                    proxy_log = scrapertools.find_single_match(str(proxy_addr_forced), "{'http.*':\s*'(.*?)'}")
            
                if proxy and proxy_addr:
                    if proxy_addr_forced: proxy_addr = proxy_addr_forced
                    handlers.append(urllib2.ProxyHandler(proxy_addr))
                    proxy_stat = ', Proxy Direct ' + proxy_log
                elif proxy and proxy_CF_addr:
                    if proxy_addr_forced: proxy_CF_addr = proxy_addr_forced
                    handlers.append(urllib2.ProxyHandler(proxy_CF_addr))
                    proxy_stat = ', Proxy CF ' + proxy_log
                elif proxy and proxy_addr_forced:
                    proxy_addr = proxy_addr_forced
                    handlers.append(urllib2.ProxyHandler(proxy_addr))
                    proxy_stat = ', Proxy Direct ' + proxy_log
                elif proxy and not proxy_addr and not proxy_CF_addr and not proxy_addr_forced:
                    proxy = False
                    if not proxy_web_name:
                        proxy_addr, proxy_CF_addr, proxy_web_name, proxy_log = proxytools.get_proxy_addr(url, forced_proxy='Total')
                    if proxy_web_name:
                        proxy_web = True
                    else:
                        proxy_web = False
                        if proxy_addr:
                            proxy = True
                            handlers.append(urllib2.ProxyHandler(proxy_addr))
                            proxy_stat = ', Proxy Direct ' + proxy_log

                if proxy_web and proxy_web_name:
                    if post: proxy_log = '(POST) ' + proxy_log
                    url, post, headers_proxy, proxy_web_name = proxytools.set_proxy_web(url, proxy_web_name, post=post)
                    if proxy_web_name:
                        proxy_stat = ', Proxy Web ' + proxy_log
                        if headers_proxy:
                            request_headers.update(dict(headers_proxy))
                if proxy_web and not proxy_web_name:
                    proxy_web = False
                    proxy_addr, proxy_CF_addr, proxy_web_name, proxy_log = proxytools.get_proxy_addr(url, forced_proxy='Total')
                    if proxy_CF_addr:
                        proxy = True
                        handlers.append(urllib2.ProxyHandler(proxy_CF_addr))
                        proxy_stat = ', Proxy CF ' + proxy_log
                    elif proxy_addr:
                        proxy = True
                        handlers.append(urllib2.ProxyHandler(proxy_addr))
                        proxy_stat = ', Proxy Direct ' + proxy_log
        except:
            import traceback
            logger.error(traceback.format_exc())
            proxy = ''
            proxy_web = ''
            proxy_stat = ''
            proxy_addr = ''
            proxy_CF_addr = ''
            proxy_web_name = ''
            proxy_log = ''
            url = url_save
            
        # Limitar tiempo de descarga si no se ha pasado timeout y hay un valor establecido en la variable global
        if timeout is None and HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT is not None: timeout = HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT
        if timeout == 0: timeout = None

        if not alfa_s:
            logger.info("----------------------------------------------")
            logger.info("downloadpage Alfa: %s" %__version)
            logger.info("----------------------------------------------")
            logger.info("Timeout: %s" % timeout)
            logger.info("URL: " + url)
            logger.info("Dominio: " + urlparse.urlparse(url)[1])
            if post:
                logger.info("Peticion: POST" + proxy_stat)
            else:
                logger.info("Peticion: GET" + proxy_stat)
                logger.info("Usar Cookies: %s" % cookies)
                logger.info("Descargar Pagina: %s" % (not only_headers))
                logger.info("Fichero de Cookies: " + ficherocookies)
            logger.info("Headers:")
            for header in request_headers:
                logger.info("- %s: %s" % (header, request_headers[header]))

        # Handlers
        if not follow_redirects:
            handlers.append(NoRedirectHandler())

        if cookies:
            handlers.append(urllib2.HTTPCookieProcessor(cj))

        opener = urllib2.build_opener(*handlers)

        if not alfa_s:
            logger.info("Realizando Peticion")

        # Contador
        inicio = time.time()

        req = urllib2.Request(url, post, request_headers)

        try:
            if urllib2.__version__ == "2.4":
                import socket
                deftimeout = socket.getdefaulttimeout()
                if timeout is not None:
                    socket.setdefaulttimeout(timeout)
                handle = opener.open(req)
                socket.setdefaulttimeout(deftimeout)
            else:
                handle = opener.open(req, timeout=timeout)

        except urllib2.HTTPError, handle:
            response["sucess"] = False
            response["code"] = handle.code
            response["error"] = handle.__dict__.get("reason", str(handle))
            response["headers"] = handle.headers.dict
            if not only_headers:
                response["data"] = handle.read()
            else:
                response["data"] = ""
            response["time"] = time.time() - inicio
            response["url"] = handle.geturl()

        except Exception, e:
            response["sucess"] = False
            response["code"] = e.__dict__.get("errno", e.__dict__.get("code", str(e)))
            response["error"] = e.__dict__.get("reason", str(e))
            response["headers"] = {}
            response["data"] = ""
            response["time"] = time.time() - inicio
            response["url"] = url
Пример #34
0
"""
RssReader combines an RSS parser, feed url management, and timed updates,
with the option of adding observers to get notification of changes
"""

import threading
import calendar
import time

try:
    from email.utils import parsedate  # for parsing dates
except ImportError:
    from email.Utils import parsedate  # for parsing dates

import socket
Timeout = socket.getdefaulttimeout()
import feedparser
import urllib2
from AccessGrid.Preferences import Preferences
from AccessGrid import Utilities
from AccessGrid import Log

# work around apparent bug in socket.setdefaulttimeout,
# which is used in feedparser
socket.setdefaulttimeout(Timeout)


def strtimeToSecs(strtime):
    """
    Convert a time string to seconds since the epoch
    
Пример #35
0
 def __init__(self, timeout=None):
     self.old_timeout = socket.getdefaulttimeout()
     self.timeout = timeout
Пример #36
0
 def test_http_basic(self):
     self.assertIsNone(socket.getdefaulttimeout())
     url = "http://www.example.com"
     with test_support.transient_internet(url, timeout=None):
         u = _urlopen_with_retry(url)
         self.assertIsNone(u.fp._sock.fp._sock.gettimeout())
Пример #37
0
    def _make_request(self,
                      conn,
                      method,
                      url,
                      timeout=_Default,
                      **httplib_request_kw):
        """
        Perform a request on a given urllib connection object taken from our
        pool.

        :param conn:
            a connection from one of our connection pools

        :param timeout:
            Socket timeout in seconds for the request. This can be a
            float or integer, which will set the same timeout value for
            the socket connect and the socket read, or an instance of
            :class:`urllib3.util.Timeout`, which gives you more fine-grained
            control over your timeouts.
        """
        self.num_requests += 1

        timeout_obj = self._get_timeout(timeout)
        timeout_obj.start_connect()
        conn.timeout = timeout_obj.connect_timeout

        # Trigger any extra validation we need to do.
        try:
            self._validate_conn(conn)
        except (SocketTimeout, BaseSSLError) as e:
            # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout.
            self._raise_timeout(err=e, url=url, timeout_value=conn.timeout)
            raise

        # conn.request() calls httplib.*.request, not the method in
        # urllib3.request. It also calls makefile (recv) on the socket.
        conn.request(method, url, **httplib_request_kw)

        # Reset the timeout for the recv() on the socket
        read_timeout = timeout_obj.read_timeout

        # App Engine doesn't have a sock attr
        if getattr(conn, 'sock', None):
            # In Python 3 socket.py will catch EAGAIN and return None when you
            # try and read into the file pointer created by http.client, which
            # instead raises a BadStatusLine exception. Instead of catching
            # the exception and assuming all BadStatusLine exceptions are read
            # timeouts, check for a zero timeout before making the request.
            if read_timeout == 0:
                raise ReadTimeoutError(
                    self, url,
                    "Read timed out. (read timeout=%s)" % read_timeout)
            if read_timeout is Timeout.DEFAULT_TIMEOUT:
                conn.sock.settimeout(socket.getdefaulttimeout())
            else:  # None or a value
                conn.sock.settimeout(read_timeout)

        # Receive the response from the server
        try:
            try:  # Python 2.7, use buffering of HTTP responses
                httplib_response = conn.getresponse(buffering=True)
            except TypeError:  # Python 2.6 and older
                httplib_response = conn.getresponse()
        except (SocketTimeout, BaseSSLError, SocketError) as e:
            self._raise_timeout(err=e, url=url, timeout_value=read_timeout)
            raise

        # AppEngine doesn't have a version attr.
        http_version = getattr(conn, '_http_vsn_str', 'HTTP/?')
        log.debug("\"%s %s %s\" %s %s" %
                  (method, url, http_version, httplib_response.status,
                   httplib_response.length))
        return httplib_response
Пример #38
0
 def test_ftp_basic(self):
     self.assertTrue(socket.getdefaulttimeout() is None)
     u = _urlopen_with_retry(self.FTP_HOST)
     self.assertTrue(u.fp.fp._sock.gettimeout() is None)
Пример #39
0
 def test_http_basic(self):
     self.assertTrue(socket.getdefaulttimeout() is None)
     u = _urlopen_with_retry("http://www.python.org")
     self.assertTrue(u.fp._sock.fp._sock.gettimeout() is None)
Пример #40
0
    def Send(self,
             request_path,
             payload="",
             content_type="application/octet-stream",
             timeout=None,
             **kwargs):
        """Sends an RPC and returns the response.

    Args:
      request_path: The path to send the request to, eg /api/appversion/create.
      payload: The body of the request, or None to send an empty request.
      content_type: The Content-Type header to use.
      timeout: timeout in seconds; default None i.e. no timeout.
        (Note: for large requests on OS X, the timeout doesn't work right.)
      kwargs: Any keyword arguments are converted into query string parameters.

    Returns:
      The response body, as a string.
    """
        # We get the auth domain to know if we're uploading to Google or to
        # Appscale.
        auth_domain = ''
        if 'AUTH_DOMAIN' in os.environ:
            auth_domain = os.environ['AUTH_DOMAIN'].lower()

        old_timeout = socket.getdefaulttimeout()
        socket.setdefaulttimeout(timeout)
        try:
            tries = 0
            while True:
                tries += 1
                if auth_domain == "appscale":
                    self._LoadAppScaleCookie()

                url = "%s://%s%s" % (self.scheme, self.host, request_path)
                if kwargs:

                    url += "?" + urllib.urlencode(sorted(kwargs.items()))
                req = self._CreateRequest(url=url, data=payload)
                req.add_header("Content-Type", content_type)

                req.add_header("X-appcfg-api-version", "1")

                try:
                    logger.debug(
                        'Sending %s request:\n%s', self.scheme.upper(),
                        HttpRequestToString(req, include_data=self.debug_data))
                    f = self.opener.open(req)
                    response = f.read()
                    f.close()

                    return response
                except urllib2.HTTPError, e:
                    logger.debug("Got http error, this is try #%s", tries)

                    if tries > self.rpc_tries:
                        raise AppScaleAuthenticationError("Unable to authenticate " + \
                                                          "with AppScale.")

                    # App Load Balancer returns HTTP 502 if invalid cookie
                    # is used for authentication, though ideally it should throw
                    # HTTP 401 only. But handling HTTP 502 here until the App
                    # Load Balancer code is fixed.
                    if e.code == 401 or e.code == 502:
                        if auth_domain == 'appscale':
                            self._AppScaleAuthenticate()
                        else:
                            self._Authenticate()
                    elif e.code >= 500 and e.code < 600:
                        continue

                    elif e.code == 302:
                        if tries >= 2:
                            if auth_domain == 'appscale':
                                logger.info("Deleting authentication cookie : %s" % \
                                            self.cookie_jar.filename)
                                if os.path.isfile(self.cookie_jar.filename):
                                    os.remove(self.cookie_jar.filename)
                                raise AppScaleAuthenticationError("Could not " + \
                                      "authenticate with AppScale. Wrong username/password.")
                            else:
                                raise
                        loc = e.info()["location"]
                        logger.debug("Got 302 redirect. Location: %s", loc)
                        if loc.startswith(
                                "https://www.google.com/accounts/ServiceLogin"
                        ):
                            self._Authenticate()
                        elif re.match(
                                r"https://www.google.com/a/[a-z0-9.-]+/ServiceLogin",
                                loc):
                            self.account_type = os.getenv(
                                "APPENGINE_RPC_HOSTED_LOGIN_TYPE", "HOSTED")
                            self._Authenticate()
                        elif auth_domain == 'appscale':
                            self._AppScaleAuthenticate()
                        elif loc.startswith("http://%s/_ah/login" %
                                            (self.host, )):
                            self._DevAppServerAuthenticate()
                        else:
                            raise
                    elif e.code == 403:
                        if auth_domain == 'appscale':
                            logger.info("Deleting authentication cookie : %s" % \
                                        self.cookie_jar.filename)
                            os.remove(self.cookie_jar.filename)
                            raise AppScaleAuthenticationError("Could not authenticate " + \
                                                               "with AppScale.")
                        else:
                            raise
                    else:
                        raise
        finally:
            socket.setdefaulttimeout(old_timeout)
Пример #41
0
 def setUpClass(cls):
     cls.original_socket_default_timeout = socket.getdefaulttimeout()
Пример #42
0
def socket_getdefaulttimeout():
    return socket.getdefaulttimeout()
Пример #43
0
    def _make_request(self,
                      conn,
                      method,
                      url,
                      timeout=_Default,
                      chunked=False,
                      **httplib_request_kw):
        """
        Perform a base_ on a given urllib connection object taken from our
        pool.

        :param conn:
            a connection from one of our connection pools

        :param timeout:
            Socket timeout in seconds for the base_. This can be a
            float or integer, which will set the same timeout value for
            the socket connect and the socket read, or an instance of
            :class:`urllib3.util.Timeout`, which gives you more fine-grained
            control over your timeouts.
        """
        self.num_requests += 1

        timeout_obj = self._get_timeout(timeout)
        timeout_obj.start_connect()
        conn.timeout = timeout_obj.connect_timeout

        # Trigger any extra validation we need to do.
        try:
            self._validate_conn(conn)
        except (SocketTimeout, BaseSSLError) as e:
            # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout.
            self._raise_timeout(err=e, url=url, timeout_value=conn.timeout)
            raise

        # conn.base_() calls httplib.*.base_, not the method in
        # urllib3.base_. It also calls makefile (recv) on the socket.
        if chunked:
            conn.request_chunked(method, url, **httplib_request_kw)
        else:
            conn.request(method, url, **httplib_request_kw)

        # Reset the timeout for the recv() on the socket
        read_timeout = timeout_obj.read_timeout

        # App Engine doesn't have a sock attr
        if getattr(conn, 'sock', None):
            # In Python 3 socket.py will catch EAGAIN and return None when you
            # try and read into the file pointer created by M_http.client, which
            # instead raises a BadStatusLine exception. Instead of catching
            # the exception and assuming all BadStatusLine exceptions are read
            # timeouts, check for a zero timeout before making the base_.
            if read_timeout == 0:
                raise ReadTimeoutError(
                    self, url,
                    "Read timed out. (read timeout=%s)" % read_timeout)
            if read_timeout is Timeout.DEFAULT_TIMEOUT:
                conn.sock.settimeout(socket.getdefaulttimeout())
            else:  # None or a value
                conn.sock.settimeout(read_timeout)

        # Receive the response from the server
        try:
            try:
                # Python 2.7, use buffering of HTTP responses
                httplib_response = conn.getresponse(buffering=True)
            except TypeError:
                # Python 3
                try:
                    httplib_response = conn.getresponse()
                except Exception as e:
                    # Remove the TypeError from the exception chain in Python 3;
                    # otherwise it looks like a programming error was the cause.
                    six.raise_from(e, None)
        except (SocketTimeout, BaseSSLError, SocketError) as e:
            self._raise_timeout(err=e, url=url, timeout_value=read_timeout)
            raise

        # AppEngine doesn't have a version attr.
        http_version = getattr(conn, '_http_vsn_str', 'HTTP/?')
        log.debug("%s://%s:%s \"%s %s %s\" %s %s", self.scheme, self.host,
                  self.port, method, url, http_version,
                  httplib_response.status, httplib_response.length)

        try:
            assert_header_parsing(httplib_response.msg)
        except (HeaderParsingError,
                TypeError) as hpe:  # Platform-specific: Python 3
            log.warning('Failed to parse headers (url=%s): %s',
                        self._absolute_url(url),
                        hpe,
                        exc_info=True)

        return httplib_response
Пример #44
0
 def test_ftp_basic(self):
     self.assertIsNone(socket.getdefaulttimeout())
     with test_support.transient_internet(self.FTP_HOST, timeout=None):
         u = _urlopen_with_retry(self.FTP_HOST)
         self.assertIsNone(u.fp.fp._sock.gettimeout())
Пример #45
0
def default_socket_timeout(timeout):
    """Context temporarily setting the default socket timeout."""
    prev = socket.getdefaulttimeout()
    socket.setdefaulttimeout(timeout)
    yield
    socket.setdefaulttimeout(prev)
Пример #46
0
  def _RetrieveURL(url, payload, method, headers, request, response,
                   follow_redirects=True, deadline=_API_CALL_DEADLINE,
                   validate_certificate=_API_CALL_VALIDATE_CERTIFICATE_DEFAULT):
    """Retrieves a URL over network.

    Args:
      url: String containing the URL to access.
      payload: Request payload to send, if any; None if no payload.
        If the payload is unicode, we assume it is utf-8.
      method: HTTP method to use (e.g., 'GET')
      headers: List of additional header objects to use for the request.
      request: A urlfetch_service_pb.URLFetchRequest proto object from
          original request.
      response: A urlfetch_service_pb.URLFetchResponse proto object to
          populate with the response data.
      follow_redirects: optional setting (defaulting to True) for whether or not
        we should transparently follow redirects (up to MAX_REDIRECTS)
      deadline: Number of seconds to wait for the urlfetch to finish.
      validate_certificate: If true, do not send request to server unless the
        certificate is valid, signed by a trusted CA and the hostname matches
        the certificate.

    Raises:
      Raises an apiproxy_errors.ApplicationError exception with
      INVALID_URL_ERROR in cases where:
        - The protocol of the redirected URL is bad or missing.
        - The port is not in the allowable range of ports.
      Raises an apiproxy_errors.ApplicationError exception with
      TOO_MANY_REDIRECTS in cases when MAX_REDIRECTS is exceeded
    """
    last_protocol = ''
    last_host = ''
    if isinstance(payload, unicode):
      payload = payload.encode('utf-8')

    for redirect_number in xrange(MAX_REDIRECTS + 1):
      parsed = urlparse.urlsplit(url)
      protocol, host, path, query, fragment = parsed







      port = urllib.splitport(urllib.splituser(host)[1])[1]

      if not _IsAllowedPort(port):
        logging.error(
          'urlfetch received %s ; port %s is not allowed in production!' %
          (url, port))





        raise apiproxy_errors.ApplicationError(
          urlfetch_service_pb.URLFetchServiceError.INVALID_URL)

      if protocol and not host:

        logging.error('Missing host on redirect; target url is %s' % url)
        raise apiproxy_errors.ApplicationError(
          urlfetch_service_pb.URLFetchServiceError.INVALID_URL)




      if not host and not protocol:
        host = last_host
        protocol = last_protocol









      adjusted_headers = {
          'User-Agent':
          [('AppEngine-Google; (+http://code.google.com/appengine; appid: %s)'
            % os.getenv('APPLICATION_ID'))],
          'Host': [host],
          'Accept-Encoding': ['gzip'],
      }
      if payload is not None:


        adjusted_headers['Content-Length'] = [str(len(payload))]


      if method == 'POST' and payload:
        adjusted_headers['Content-Type'] = [
            'application/x-www-form-urlencoded']

      passthrough_content_encoding = False
      for header in headers:



        header_key = header.key()
        if header_key.lower() == 'user-agent':
          adjusted_headers[header_key.title()] = [(
              '%s %s' % (header.value(), adjusted_headers['User-Agent'][0]))]
        elif header_key.lower() == 'accept-encoding':
          passthrough_content_encoding = True
          adjusted_headers[header_key.title()] = [header.value()]
        elif header_key.lower() == 'content-type':
          adjusted_headers[header_key.title()] = [header.value()]
        else:
          adjusted_headers.setdefault(header_key, []).append(header.value())

      if payload is not None:
        escaped_payload = payload.encode('string_escape')
      else:
        escaped_payload = ''
      logging.debug('Making HTTP request: host = %r, '
                    'url = %r, payload = %.1000r, headers = %r',
                    host, url, escaped_payload, adjusted_headers)
      try:
        proxy_host = None

        if protocol == 'http':
          connection_class = httplib.HTTPConnection
          default_port = 80

          if os.environ.get('HTTP_PROXY') and not _IsLocalhost(host):
            _, proxy_host, _, _, _ = (
                urlparse.urlsplit(os.environ.get('HTTP_PROXY')))
        elif protocol == 'https':
          if (validate_certificate and _CanValidateCerts() and
              CERT_PATH):

            connection_class = fancy_urllib.create_fancy_connection(
                ca_certs=CERT_PATH)
          else:
            connection_class = httplib.HTTPSConnection

          default_port = 443

          if (_CONNECTION_SUPPORTS_SSL_TUNNEL and
              os.environ.get('HTTPS_PROXY') and not _IsLocalhost(host)):
            _, proxy_host, _, _, _ = (
                urlparse.urlsplit(os.environ.get('HTTPS_PROXY')))
        else:

          error_msg = 'Redirect specified invalid protocol: "%s"' % protocol
          logging.error(error_msg)
          raise apiproxy_errors.ApplicationError(
              urlfetch_service_pb.URLFetchServiceError.INVALID_URL, error_msg)






        connection_kwargs = (
            {'timeout': deadline} if _CONNECTION_SUPPORTS_TIMEOUT else {})

        if proxy_host:
          proxy_address, _, proxy_port = proxy_host.partition(':')
          connection = connection_class(
              proxy_address, proxy_port if proxy_port else default_port,
              **connection_kwargs)
          full_path = urlparse.urlunsplit((protocol, host, path, query, ''))

          if protocol == 'https':
            connection.set_tunnel(host)
        else:
          connection = connection_class(host, **connection_kwargs)
          full_path = urlparse.urlunsplit(('', '', path, query, ''))



        last_protocol = protocol
        last_host = host

        if not _CONNECTION_SUPPORTS_TIMEOUT:
          orig_timeout = socket.getdefaulttimeout()
        try:
          if not _CONNECTION_SUPPORTS_TIMEOUT:


            socket.setdefaulttimeout(deadline)
          _SendRequest(connection, method, full_path, payload, adjusted_headers)
          http_response = connection.getresponse()
          if method == 'HEAD':
            http_response_data = ''
          else:
            http_response_data = http_response.read()
        finally:
          if not _CONNECTION_SUPPORTS_TIMEOUT:
            socket.setdefaulttimeout(orig_timeout)
          connection.close()
      except _fancy_urllib_InvalidCertException, e:
        raise apiproxy_errors.ApplicationError(
          urlfetch_service_pb.URLFetchServiceError.SSL_CERTIFICATE_ERROR,
          str(e))
      except _fancy_urllib_SSLError, e:





        app_error = (
            urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED
            if 'timed out' in e.message else
            urlfetch_service_pb.URLFetchServiceError.SSL_CERTIFICATE_ERROR)
        raise apiproxy_errors.ApplicationError(app_error, str(e))
Пример #47
0
 def _f(*args, **kwargs):
   orig_timeout = socket.getdefaulttimeout()
   socket.setdefaulttimeout(args[0].__timeout)
   result = f(*args, **kwargs)
   socket.setdefaulttimeout(orig_timeout)
   return result
Пример #48
0
 def test_ftp_basic(self):
     self.assertIsNone(socket.getdefaulttimeout())
     with socket_helper.transient_internet(self.FTP_HOST, timeout=None):
         u = _urlopen_with_retry(self.FTP_HOST)
         self.addCleanup(u.close)
         self.assertIsNone(u.fp.fp.raw._sock.gettimeout())
Пример #49
0
def transient_internet(resource_name, timeout=30.0, errnos=()):
    """Return a context manager that raises ResourceDenied when various issues
    with the Internet connection manifest themselves as exceptions."""
    default_errnos = [
        ('ECONNREFUSED', 111),
        ('ECONNRESET', 104),
        ('EHOSTUNREACH', 113),
        ('ENETUNREACH', 101),
        ('ETIMEDOUT', 110),
    ]
    default_gai_errnos = [
        ('EAI_AGAIN', -3),
        ('EAI_FAIL', -4),
        ('EAI_NONAME', -2),
        ('EAI_NODATA', -5),
        # Windows defines EAI_NODATA as 11001 but idiotic getaddrinfo()
        # implementation actually returns WSANO_DATA i.e. 11004.
        ('WSANO_DATA', 11004),
    ]

    denied = ResourceDenied("Resource '%s' is not available" % resource_name)
    captured_errnos = errnos
    gai_errnos = []
    if not captured_errnos:
        captured_errnos = [
            getattr(errno, name, num) for (name, num) in default_errnos
        ]
        gai_errnos = [
            getattr(socket, name, num) for (name, num) in default_gai_errnos
        ]

    def filter_error(err):
        n = getattr(err, 'errno', None)
        if (isinstance(err, socket.timeout)
                or (isinstance(err, socket.gaierror) and n in gai_errnos)
                or n in captured_errnos):
            if not verbose:
                sys.stderr.write(denied.args[0] + "\n")
            raise denied

    old_timeout = socket.getdefaulttimeout()
    try:
        if timeout is not None:
            socket.setdefaulttimeout(timeout)
        yield
    except IOError as err:
        # urllib can wrap original socket errors multiple times (!), we must
        # unwrap to get at the original error.
        while True:
            a = err.args
            if len(a) >= 1 and isinstance(a[0], IOError):
                err = a[0]
            # The error can also be wrapped as args[1]:
            #    except socket.error as msg:
            #        raise IOError('socket error', msg).with_traceback(sys.exc_info()[2])
            elif len(a) >= 2 and isinstance(a[1], IOError):
                err = a[1]
            else:
                break
        filter_error(err)
        raise
    # XXX should we catch generic exceptions and look for their
    # __cause__ or __context__?
    finally:
        socket.setdefaulttimeout(old_timeout)
Пример #50
0
        def __call__(self, *args, **kw):
            if self._name == "_":
                if self.__name in ["__repr__", "__str__"]:
                    return self.__repr__()
            else:
                chain = []
                parent = self._parent
                while parent._parent:
                    chain = [parent._name] + chain
                    parent = parent._parent
                url = parent._icontrol_url % parent.__dict__
                ns = parent._icontrol_ns + ':' + '/'.join(chain)
                if parent._url_params:
                    url = "%s?%s" % (
                        url, urllib.parse.urlencode(parent._url_params))
                    parent._cache.clear()

                p = parent
                if p._cache.get(ns) is not None:
                    ic = p._cache[ns]
                else:
                    if parent._session:
                        headers = SOAPpy.Types.headerType()
                        sess_t = SOAPpy.Types.integerType(parent._session)
                        sess_t._setMustUnderstand(0)
                        sess_t._setAttr('xmlns:myns1', parent._icontrol_ns)
                        headers._addItem('myns1:session', sess_t)
                        ic = SOAPpy.SOAPProxy(url,
                                              ns,
                                              header=headers,
                                              timeout=p.timeout)
                    else:
                        ic = SOAPpy.SOAPProxy(url, ns, timeout=p.timeout)
                    p._cache[ns] = ic
                    #ic.config.debug = p._debug
                    ic.simplify_objects = 1

                try:
                    # An ugly way of setting the timeout per socket, but it
                    # seems that SOAPpy is ignoring the timeout parameter set in
                    # the SOAPProxy constructor.
                    before = socket.getdefaulttimeout()
                    socket.setdefaulttimeout(p.timeout)
                    if p._debug:
                        LOG.debug(
                            "%s -> %s.%s(%s)", url, '.'.join(chain),
                            self._name, ', '.join([
                                '%s=%s' % (x, y) for x, y in list(kw.items())
                            ]))
                    ret = getattr(ic, self._name)(*args, **kw)
                    if p._debug:
                        LOG.debug(ret)
                    return ret
                except SOAPpy.Types.faultType as e:
                    if 'Unknown method' in e.faultstring:
                        raise UnknownMethod(e)
                    raise IControlFault(e)
                except SOAPpy.Errors.HTTPError as e:
                    if 401 == e.code:
                        raise AuthFailed(e)
                    raise IControlTransportError(e)
                finally:
                    socket.setdefaulttimeout(before)
Пример #51
0
    def ipkgCallback(self, event, param):
        if event == IpkgComponent.EVENT_DOWNLOAD:
            self.status.setText(_("Downloading"))
        elif event == IpkgComponent.EVENT_UPGRADE:
            if self.sliderPackages.has_key(param):
                self.slider.setValue(self.sliderPackages[param])
            self.package.setText(param)
            self.status.setText(
                _("Upgrading") + ": %s/%s" %
                (self.packages, self.total_packages))
            if not param in self.processed_packages:
                self.processed_packages.append(param)
                self.packages += 1
        elif event == IpkgComponent.EVENT_INSTALL:
            self.package.setText(param)
            self.status.setText(_("Installing"))
            if not param in self.processed_packages:
                self.processed_packages.append(param)
                self.packages += 1
        elif event == IpkgComponent.EVENT_REMOVE:
            self.package.setText(param)
            self.status.setText(_("Removing"))
            if not param in self.processed_packages:
                self.processed_packages.append(param)
                self.packages += 1
        elif event == IpkgComponent.EVENT_CONFIGURING:
            self.package.setText(param)
            self.status.setText(_("Configuring"))

        elif event == IpkgComponent.EVENT_MODIFIED:
            if config.plugins.softwaremanager.overwriteConfigFiles.getValue(
            ) in ("N", "Y"):
                self.ipkg.write(True and config.plugins.softwaremanager.
                                overwriteConfigFiles.getValue())
            else:
                self.session.openWithCallback(
                    self.modificationCallback, MessageBox,
                    _("A configuration file (%s) has been modified since it was installed.\nDo you want to keep your modifications?"
                      ) % param)
        elif event == IpkgComponent.EVENT_ERROR:
            self.error += 1
        elif event == IpkgComponent.EVENT_DONE:
            if self.updating:
                self.updating = False
                self.ipkg.startCmd(IpkgComponent.CMD_UPGRADE_LIST)
            elif self.ipkg.currentCommand == IpkgComponent.CMD_UPGRADE_LIST:
                from urllib import urlopen
                import socket
                currentTimeoutDefault = socket.getdefaulttimeout()
                socket.setdefaulttimeout(3)
                try:
                    config.softwareupdate.updateisunstable.setValue(
                        urlopen(
                            "http://enigma2.world-of-satellite.com/feeds/" +
                            getImageVersion() + "/status").read())
                except:
                    config.softwareupdate.updateisunstable.setValue('1')
                socket.setdefaulttimeout(currentTimeoutDefault)
                self.total_packages = None
                if config.softwareupdate.updateisunstable.getValue(
                ) == '1' and config.softwareupdate.updatebeta.getValue():
                    self.total_packages = len(self.ipkg.getFetchedList())
                    message = _(
                        "The current update may be unstable") + "\n" + _(
                            "Are you sure you want to update your %s %s ?"
                        ) % (getMachineBrand(), getMachineName()) + "\n(" + (
                            ngettext("%s updated package available",
                                     "%s updated packages available",
                                     self.total_packages) %
                            self.total_packages) + ")"
                elif config.softwareupdate.updateisunstable.getValue() == '0':
                    self.total_packages = len(self.ipkg.getFetchedList())
                    message = _("Do you want to update your %s %s ?") % (
                        getMachineBrand(), getMachineName()) + "\n(" + (
                            ngettext("%s updated package available",
                                     "%s updated packages available",
                                     self.total_packages) %
                            self.total_packages) + ")"
                if self.total_packages:
                    global ocram
                    for package_tmp in self.ipkg.getFetchedList():
                        if package_tmp[0].startswith(
                                'enigma2-plugin-picons-tv-ocram'):
                            ocram = ocram + '[ocram-picons] ' + package_tmp[
                                0].split('enigma2-plugin-picons-tv-ocram.')[
                                    1] + 'updated ' + package_tmp[2] + '\n'
                        elif package_tmp[0].startswith(
                                'enigma2-plugin-settings-ocram'):
                            ocram = ocram + '[ocram-settings] ' + package_tmp[
                                0].split('enigma2-plugin-picons-tv-ocram.')[
                                    1] + 'updated ' + package_tmp[2] + '\n'
                    config.softwareupdate.updatefound.setValue(True)
                    choices = [(_("View the changes"), "changes"),
                               (_("Upgrade and reboot system"), "cold")]
                    if path.exists(
                            "/usr/lib/enigma2/python/Plugins/SystemPlugins/ViX/BackupManager.pyo"
                    ):
                        if not config.softwareupdate.autosettingsbackup.getValue(
                        ) and config.backupmanager.backuplocation.getValue():
                            choices.append(
                                (_("Perform a settings backup,") + '\n\t' +
                                 _("making a backup before updating") +
                                 '\n\t' + _("is strongly advised."), "backup"))
                        if not config.softwareupdate.autoimagebackup.getValue(
                        ) and config.imagemanager.backuplocation.getValue():
                            choices.append((_("Perform a full image backup"),
                                            "imagebackup"))
                    choices.append((_("Update channel list only"), "channels"))
                    choices.append((_("Cancel"), ""))
                    upgrademessage = self.session.openWithCallback(
                        self.startActualUpgrade,
                        ChoiceBox,
                        title=message,
                        list=choices,
                        skin_name="SoftwareUpdateChoices")
                    upgrademessage.setTitle(_('Software update'))
                else:
                    upgrademessage = self.session.openWithCallback(
                        self.close,
                        MessageBox,
                        _("Nothing to upgrade"),
                        type=MessageBox.TYPE_INFO,
                        timeout=10,
                        close_on_any_key=True)
                    upgrademessage.setTitle(_('Software update'))
            elif self.channellist_only > 0:
                if self.channellist_only == 1:
                    self.setEndMessage(
                        _("Could not find installed channel list."))
                elif self.channellist_only == 2:
                    self.slider.setValue(2)
                    self.ipkg.startCmd(IpkgComponent.CMD_REMOVE,
                                       {'package': self.channellist_name})
                    self.channellist_only += 1
                elif self.channellist_only == 3:
                    self.slider.setValue(3)
                    self.ipkg.startCmd(IpkgComponent.CMD_INSTALL,
                                       {'package': self.channellist_name})
                    self.channellist_only += 1
                elif self.channellist_only == 4:
                    self.showUpdateCompletedMessage()
                    eDVBDB.getInstance().reloadBouquets()
                    eDVBDB.getInstance().reloadServicelist()
            elif self.error == 0:
                self.showUpdateCompletedMessage()
            else:
                self.activityTimer.stop()
                self.activityslider.setValue(0)
                error = _(
                    "Your %s %s might be unusable now. Please consult the manual for further assistance before rebooting your %s %s."
                ) % (getMachineBrand(), getMachineName(), getMachineBrand(),
                     getMachineName())
                if self.packages == 0:
                    error = _("No updates available. Please try again later.")
                if self.updating:
                    error = _(
                        "Update failed. Your %s %s does not have a working internet connection."
                    ) % (getMachineBrand(), getMachineName())
                self.status.setText(_("Error") + " - " + error)
        elif event == IpkgComponent.EVENT_LISTITEM:
            if 'enigma2-plugin-settings-' in param[
                    0] and self.channellist_only > 0:
                self.channellist_name = param[0]
                self.channellist_only = 2
        #print event, "-", param
        pass
Пример #52
0
class RecursiveFetcher(object):
    LINK_FILTER = tuple(
        re.compile(i, re.IGNORECASE)
        for i in ('.exe\s*$', '.mp3\s*$', '.ogg\s*$', '^\s*mailto:', '^\s*$'))
    # ADBLOCK_FILTER = tuple(re.compile(i, re.IGNORECASE) for it in
    #                       (
    #
    #                        )
    #                       )
    CSS_IMPORT_PATTERN = re.compile(r'\@import\s+url\((.*?)\)', re.IGNORECASE)
    default_timeout = socket.getdefaulttimeout(
    )  # Needed here as it is used in __del__

    def __init__(self, options, log, image_map={}, css_map={}, job_info=None):
        bd = options.dir
        if not isinstance(bd, unicode):
            bd = bd.decode(filesystem_encoding)

        self.base_dir = os.path.abspath(os.path.expanduser(bd))
        if not os.path.exists(self.base_dir):
            os.makedirs(self.base_dir)
        self.log = log
        self.verbose = options.verbose
        self.timeout = options.timeout
        self.encoding = options.encoding
        self.browser = options.browser if hasattr(options,
                                                  'browser') else browser()
        self.max_recursions = options.max_recursions
        self.match_regexps = [
            re.compile(i, re.IGNORECASE) for i in options.match_regexps
        ]
        self.filter_regexps = [
            re.compile(i, re.IGNORECASE) for i in options.filter_regexps
        ]
        self.max_files = options.max_files
        self.delay = options.delay
        self.last_fetch_at = 0.
        self.filemap = {}
        self.imagemap = image_map
        self.imagemap_lock = threading.RLock()
        self.stylemap = css_map
        self.image_url_processor = None
        self.stylemap_lock = threading.RLock()
        self.downloaded_paths = []
        self.current_dir = self.base_dir
        self.files = 0
        self.preprocess_regexps = getattr(options, 'preprocess_regexps', [])
        self.remove_tags = getattr(options, 'remove_tags', [])
        self.remove_tags_after = getattr(options, 'remove_tags_after', None)
        self.remove_tags_before = getattr(options, 'remove_tags_before', None)
        self.keep_only_tags = getattr(options, 'keep_only_tags', [])
        self.preprocess_html_ext = getattr(options, 'preprocess_html',
                                           lambda soup: soup)
        self.preprocess_raw_html = getattr(options, 'preprocess_raw_html',
                                           lambda raw, url: raw)
        self.prepreprocess_html_ext = getattr(options, 'skip_ad_pages',
                                              lambda soup: None)
        self.postprocess_html_ext = getattr(options, 'postprocess_html', None)
        self.preprocess_image_ext = getattr(options, 'preprocess_image', None)
        self._is_link_wanted = getattr(options, 'is_link_wanted',
                                       default_is_link_wanted)
        self.compress_news_images_max_size = getattr(
            options, 'compress_news_images_max_size', None)
        self.compress_news_images = getattr(options, 'compress_news_images',
                                            False)
        self.compress_news_images_auto_size = getattr(
            options, 'compress_news_images_auto_size', 16)
        self.scale_news_images = getattr(options, 'scale_news_images', None)
        self.download_stylesheets = not options.no_stylesheets
        self.show_progress = True
        self.failed_links = []
        self.job_info = job_info

    def get_soup(self, src, url=None):
        nmassage = copy.copy(BeautifulSoup.MARKUP_MASSAGE)
        nmassage.extend(self.preprocess_regexps)
        # Some websites have buggy doctype declarations that mess up beautifulsoup
        nmassage += [(re.compile(r'<!DOCTYPE .+?>',
                                 re.DOTALL | re.IGNORECASE), lambda m: '')]
        # Remove comments as they can leave detritus when extracting tags leaves
        # multiple nested comments
        nmassage.append((re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''))
        usrc = xml_to_unicode(src, self.verbose, strip_encoding_pats=True)[0]
        usrc = self.preprocess_raw_html(usrc, url)
        soup = BeautifulSoup(usrc, markupMassage=nmassage)

        replace = self.prepreprocess_html_ext(soup)
        if replace is not None:
            soup = BeautifulSoup(xml_to_unicode(replace,
                                                self.verbose,
                                                strip_encoding_pats=True)[0],
                                 markupMassage=nmassage)

        if self.keep_only_tags:
            body = Tag(soup, 'body')
            try:
                if isinstance(self.keep_only_tags, dict):
                    self.keep_only_tags = [self.keep_only_tags]
                for spec in self.keep_only_tags:
                    for tag in soup.find('body').findAll(**spec):
                        body.insert(len(body.contents), tag)
                soup.find('body').replaceWith(body)
            except AttributeError:  # soup has no body element
                pass

        def remove_beyond(tag, next):
            while tag is not None and getattr(tag, 'name', None) != 'body':
                after = getattr(tag, next)
                while after is not None:
                    ns = getattr(tag, next)
                    after.extract()
                    after = ns
                tag = tag.parent

        if self.remove_tags_after is not None:
            rt = [self.remove_tags_after] if isinstance(
                self.remove_tags_after, dict) else self.remove_tags_after
            for spec in rt:
                tag = soup.find(**spec)
                remove_beyond(tag, 'nextSibling')

        if self.remove_tags_before is not None:
            rt = [self.remove_tags_before] if isinstance(
                self.remove_tags_before, dict) else self.remove_tags_before
            for spec in rt:
                tag = soup.find(**spec)
                remove_beyond(tag, 'previousSibling')

        for kwds in self.remove_tags:
            for tag in soup.findAll(**kwds):
                tag.extract()
        return self.preprocess_html_ext(soup)

    def fetch_url(self, url):
        data = None
        self.log.debug('Fetching', url)
        st = time.time()

        # Check for a URL pointing to the local filesystem and special case it
        # for efficiency and robustness. Bypasses delay checking as it does not
        # apply to local fetches. Ensures that unicode paths that are not
        # representable in the filesystem_encoding work.
        is_local = 0
        if url.startswith('file://'):
            is_local = 7
        elif url.startswith('file:'):
            is_local = 5
        if is_local > 0:
            url = url[is_local:]
            if iswindows and url.startswith('/'):
                url = url[1:]
            with open(url, 'rb') as f:
                data = response(f.read())
                data.newurl = 'file:' + url  # This is what mechanize does for
                # local URLs
            self.log.debug('Fetched %s in %.1f seconds' %
                           (url, time.time() - st))
            return data

        delta = time.time() - self.last_fetch_at
        if delta < self.delay:
            time.sleep(self.delay - delta)
        if isinstance(url, unicode):
            url = url.encode('utf-8')
        # Not sure is this is really needed as I think mechanize
        # handles quoting automatically, but leaving it
        # in case it breaks something
        if re.search(r'\s+', url) is not None:
            purl = list(urlparse.urlparse(url))
            for i in range(2, 6):
                purl[i] = quote(purl[i])
            url = urlparse.urlunparse(purl)
        open_func = getattr(self.browser, 'open_novisit', self.browser.open)
        try:
            with closing(open_func(url, timeout=self.timeout)) as f:
                data = response(f.read() + f.read())
                data.newurl = f.geturl()
        except urllib2.URLError as err:
            if hasattr(err, 'code') and err.code in responses:
                raise FetchError(responses[err.code])
            if getattr(err, 'reason', [0])[0] == 104 or \
                getattr(getattr(err, 'args', [None])[0], 'errno', None) in (-2,
                        -3):  # Connection reset by peer or Name or service not known
                self.log.debug('Temporary error, retrying in 1 second')
                time.sleep(1)
                with closing(open_func(url, timeout=self.timeout)) as f:
                    data = response(f.read() + f.read())
                    data.newurl = f.geturl()
            else:
                raise err
        finally:
            self.last_fetch_at = time.time()
        self.log.debug('Fetched %s in %f seconds' % (url, time.time() - st))
        return data

    def start_fetch(self, url):
        soup = BeautifulSoup(u'<a href="' + url + '" />')
        res = self.process_links(soup, url, 0, into_dir='')
        self.log.debug(url, 'saved to', res)
        return res

    def is_link_ok(self, url):
        for i in self.__class__.LINK_FILTER:
            if i.search(url):
                return False
        return True

    def is_link_wanted(self, url, tag):
        try:
            return self._is_link_wanted(url, tag)
        except NotImplementedError:
            pass
        except:
            return False
        if self.filter_regexps:
            for f in self.filter_regexps:
                if f.search(url):
                    return False
        if self.match_regexps:
            for m in self.match_regexps:
                if m.search(url):
                    return True
            return False
        return True

    def process_stylesheets(self, soup, baseurl):
        diskpath = unicode_path(os.path.join(self.current_dir, 'stylesheets'))
        if not os.path.exists(diskpath):
            os.mkdir(diskpath)
        for c, tag in enumerate(
                soup.findAll(lambda tag: tag.name.lower() in ['link', 'style']
                             and tag.has_key('type') and tag['type'].lower(
                             ) == 'text/css')):  # noqa
            if tag.has_key('href'):  # noqa
                iurl = tag['href']
                if not urlparse.urlsplit(iurl).scheme:
                    iurl = urlparse.urljoin(baseurl, iurl, False)
                with self.stylemap_lock:
                    if self.stylemap.has_key(iurl):  # noqa
                        tag['href'] = self.stylemap[iurl]
                        continue
                try:
                    data = self.fetch_url(iurl)
                except Exception:
                    self.log.exception('Could not fetch stylesheet ', iurl)
                    continue
                stylepath = os.path.join(diskpath, 'style' + str(c) + '.css')
                with self.stylemap_lock:
                    self.stylemap[iurl] = stylepath
                with open(stylepath, 'wb') as x:
                    x.write(data)
                tag['href'] = stylepath
            else:
                for ns in tag.findAll(text=True):
                    src = str(ns)
                    m = self.__class__.CSS_IMPORT_PATTERN.search(src)
                    if m:
                        iurl = m.group(1)
                        if not urlparse.urlsplit(iurl).scheme:
                            iurl = urlparse.urljoin(baseurl, iurl, False)
                        with self.stylemap_lock:
                            if self.stylemap.has_key(iurl):  # noqa
                                ns.replaceWith(
                                    src.replace(m.group(1),
                                                self.stylemap[iurl]))
                                continue
                        try:
                            data = self.fetch_url(iurl)
                        except Exception:
                            self.log.exception('Could not fetch stylesheet ',
                                               iurl)
                            continue
                        c += 1
                        stylepath = os.path.join(diskpath,
                                                 'style' + str(c) + '.css')
                        with self.stylemap_lock:
                            self.stylemap[iurl] = stylepath
                        with open(stylepath, 'wb') as x:
                            x.write(data)
                        ns.replaceWith(src.replace(m.group(1), stylepath))

    def rescale_image(self, data):
        return rescale_image(data, self.scale_news_images,
                             self.compress_news_images_max_size,
                             self.compress_news_images_auto_size)

    def process_images(self, soup, baseurl):
        diskpath = unicode_path(os.path.join(self.current_dir, 'images'))
        if not os.path.exists(diskpath):
            os.mkdir(diskpath)
        c = 0
        for tag in soup.findAll(lambda tag: tag.name.lower() == 'img' and tag.
                                has_key('src')):  # noqa
            iurl = tag['src']
            if iurl.startswith('data:image/'):
                try:
                    data = b64decode(iurl.partition(',')[-1])
                except:
                    self.log.exception('Failed to decode embedded image')
                    continue
            else:
                if callable(self.image_url_processor):
                    iurl = self.image_url_processor(baseurl, iurl)
                if not urlparse.urlsplit(iurl).scheme:
                    iurl = urlparse.urljoin(baseurl, iurl, False)
                with self.imagemap_lock:
                    if self.imagemap.has_key(iurl):  # noqa
                        tag['src'] = self.imagemap[iurl]
                        continue
                try:
                    data = self.fetch_url(iurl)
                    if data == 'GIF89a\x01':
                        # Skip empty GIF files as PIL errors on them anyway
                        continue
                except Exception:
                    self.log.exception('Could not fetch image ', iurl)
                    continue
            c += 1
            fname = ascii_filename('img' + str(c))
            if isinstance(fname, unicode):
                fname = fname.encode('ascii', 'replace')
            data = self.preprocess_image_ext(
                data, iurl) if self.preprocess_image_ext is not None else data
            if data is None:
                continue
            itype = what(None, data)
            if itype == 'svg' or (itype is None and b'<svg' in data[:1024]):
                # SVG image
                imgpath = os.path.join(diskpath, fname + '.svg')
                with self.imagemap_lock:
                    self.imagemap[iurl] = imgpath
                with open(imgpath, 'wb') as x:
                    x.write(data)
                tag['src'] = imgpath
            else:
                try:
                    # Ensure image is valid
                    img = image_from_data(data)
                    if itype not in {'png', 'jpg', 'jpeg'}:
                        itype = 'png' if itype == 'gif' else 'jpeg'
                        data = image_to_data(img, fmt=itype)
                    if self.compress_news_images and itype in {'jpg', 'jpeg'}:
                        try:
                            data = self.rescale_image(data)
                        except Exception:
                            self.log.exception('failed to compress image ' +
                                               iurl)
                    # Moon+ apparently cannot handle .jpeg files
                    if itype == 'jpeg':
                        itype = 'jpg'
                    imgpath = os.path.join(diskpath, fname + '.' + itype)
                    with self.imagemap_lock:
                        self.imagemap[iurl] = imgpath
                    with open(imgpath, 'wb') as x:
                        x.write(data)
                    tag['src'] = imgpath
                except Exception:
                    traceback.print_exc()
                    continue

    def absurl(self, baseurl, tag, key, filter=True):
        iurl = tag[key]
        parts = urlparse.urlsplit(iurl)
        if not parts.netloc and not parts.path and not parts.query:
            return None
        if not parts.scheme:
            iurl = urlparse.urljoin(baseurl, iurl, False)
        if not self.is_link_ok(iurl):
            self.log.debug('Skipping invalid link:', iurl)
            return None
        if filter and not self.is_link_wanted(iurl, tag):
            self.log.debug('Filtered link: ' + iurl)
            return None
        return iurl

    def normurl(self, url):
        parts = list(urlparse.urlsplit(url))
        parts[4] = ''
        return urlparse.urlunsplit(parts)

    def localize_link(self, tag, key, path):
        parts = urlparse.urlsplit(tag[key])
        suffix = ('#' + parts.fragment) if parts.fragment else ''
        tag[key] = path + suffix

    def process_return_links(self, soup, baseurl):
        for tag in soup.findAll(lambda tag: tag.name.lower() == 'a' and tag.
                                has_key('href')):  # noqa
            iurl = self.absurl(baseurl, tag, 'href')
            if not iurl:
                continue
            nurl = self.normurl(iurl)
            if self.filemap.has_key(nurl):  # noqa
                self.localize_link(tag, 'href', self.filemap[nurl])

    def process_links(self, soup, baseurl, recursion_level, into_dir='links'):
        res = ''
        diskpath = os.path.join(self.current_dir, into_dir)
        if not os.path.exists(diskpath):
            os.mkdir(diskpath)
        prev_dir = self.current_dir
        try:
            self.current_dir = diskpath
            tags = list(soup.findAll('a', href=True))

            for c, tag in enumerate(tags):
                if self.show_progress:
                    print '.',
                    sys.stdout.flush()
                sys.stdout.flush()
                iurl = self.absurl(baseurl,
                                   tag,
                                   'href',
                                   filter=recursion_level != 0)
                if not iurl:
                    continue
                nurl = self.normurl(iurl)
                if self.filemap.has_key(nurl):  # noqa
                    self.localize_link(tag, 'href', self.filemap[nurl])
                    continue
                if self.files > self.max_files:
                    return res
                linkdir = 'link' + str(c) if into_dir else ''
                linkdiskpath = os.path.join(diskpath, linkdir)
                if not os.path.exists(linkdiskpath):
                    os.mkdir(linkdiskpath)
                try:
                    self.current_dir = linkdiskpath
                    dsrc = self.fetch_url(iurl)
                    newbaseurl = dsrc.newurl
                    if len(dsrc) == 0 or \
                       len(re.compile('<!--.*?-->', re.DOTALL).sub('', dsrc).strip()) == 0:
                        raise ValueError('No content at URL %r' % iurl)
                    if callable(self.encoding):
                        dsrc = self.encoding(dsrc)
                    elif self.encoding is not None:
                        dsrc = dsrc.decode(self.encoding, 'replace')
                    else:
                        dsrc = xml_to_unicode(dsrc, self.verbose)[0]

                    st = time.time()
                    soup = self.get_soup(dsrc, url=iurl)
                    self.log.debug('Parsed %s in %.1f seconds' %
                                   (iurl, time.time() - st))

                    base = soup.find('base', href=True)
                    if base is not None:
                        newbaseurl = base['href']
                    self.log.debug('Processing images...')
                    self.process_images(soup, newbaseurl)
                    if self.download_stylesheets:
                        self.process_stylesheets(soup, newbaseurl)

                    _fname = basename(iurl)
                    if not isinstance(_fname, unicode):
                        _fname.decode('latin1', 'replace')
                    _fname = _fname.encode('ascii', 'replace').replace(
                        '%', '').replace(os.sep, '')
                    _fname = ascii_filename(_fname)
                    _fname = os.path.splitext(_fname)[0][:120] + '.xhtml'
                    res = os.path.join(linkdiskpath, _fname)
                    self.downloaded_paths.append(res)
                    self.filemap[nurl] = res
                    if recursion_level < self.max_recursions:
                        self.log.debug('Processing links...')
                        self.process_links(soup, newbaseurl,
                                           recursion_level + 1)
                    else:
                        self.process_return_links(soup, newbaseurl)
                        self.log.debug(
                            'Recursion limit reached. Skipping links in', iurl)

                    if newbaseurl and not newbaseurl.startswith('/'):
                        for atag in soup.findAll(
                                'a', href=lambda x: x and x.startswith('/')):
                            atag['href'] = urlparse.urljoin(
                                newbaseurl, atag['href'], True)
                    if callable(self.postprocess_html_ext):
                        soup = self.postprocess_html_ext(
                            soup, c == 0 and recursion_level == 0
                            and not getattr(self, 'called_first', False),
                            self.job_info)

                        if c == 0 and recursion_level == 0:
                            self.called_first = True

                    save_soup(soup, res)
                    self.localize_link(tag, 'href', res)
                except Exception as err:
                    if isinstance(err, AbortArticle):
                        raise
                    self.failed_links.append((iurl, traceback.format_exc()))
                    self.log.exception('Could not fetch link', iurl)
                finally:
                    self.current_dir = diskpath
                    self.files += 1
        finally:
            self.current_dir = prev_dir
        if self.show_progress:
            print
        return res
Пример #53
0
        lines = html.split('\n')
        for i in [10, 50, 120]:
            charset = chardet.detect('\n'.join(lines[:i]))['encoding']
            if charset and charset.lower() != 'ascii':
                break

    if charset == None:
        charset = ''
    return charset.lower()

if __name__ == '__main__':
    import urllib2
    import sys
    import socket
        #默认超时如果已经由A3上层统一管理,就不应自己设置默认超时了
        socket.getdefaulttimeout() or socket.setdefaulttimeout(5)

    try:
        url = sys.argv[1]
    except:
        print 'Usage: python charsetck.py http://www.knownsec.com/'
        sys.exit(0)

    req = urllib2.Request(url)
    req.add_header('User-Agent','Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)')
    usock = urllib2.urlopen(req)
    headers = usock.headers.dict
    html = usock.read()
    usock.close()

    print check(headers,html)
Пример #54
0
def read_body_and_headers(url,post=None,headers=[],follow_redirects=False,timeout=None):
    _log("read_body_and_headers "+url)
    if post is not None: _log("read_body_and_headers post="+post)
    if len(headers)==0: headers.append(["User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:18.0) Gecko/20100101 Firefox/18.0"])
    # Start cookie lib
    ficherocookies=os.path.join(get_data_path(),'cookies.dat'); _log("read_body_and_headers cookies_file="+ficherocookies); cj=None; ClientCookie=None; cookielib=None
    try: _log("read_body_and_headers importing cookielib"); import cookielib # Let's see if cookielib is available
    except ImportError:
        _log("read_body_and_headers cookielib no disponible") # If importing cookielib fails # let's try ClientCookie
        try: _log("read_body_and_headers importing ClientCookie"); import ClientCookie
        except ImportError: _log("read_body_and_headers ClientCookie not available"); urlopen=urllib2.urlopen; Request=urllib2.Request # ClientCookie isn't available either
        else: _log("read_body_and_headers ClientCookie available"); urlopen=ClientCookie.urlopen; Request=ClientCookie.Request; cj=ClientCookie.MozillaCookieJar() # imported ClientCookie
    else:
        _log("read_body_and_headers cookielib available"); urlopen=urllib2.urlopen; Request=urllib2.Request; cj=cookielib.MozillaCookieJar() # importing cookielib worked
        # This is a subclass of FileCookieJar # that has useful load and save methods
    if cj is not None: # we successfully imported # one of the two cookie handling modules
        _log("read_body_and_headers Cookies enabled")
        if os.path.isfile(ficherocookies):
            _log("read_body_and_headers Reading cookie file")
            try: cj.load(ficherocookies) # if we have a cookie file already saved # then load the cookies into the Cookie Jar
            except: _log("read_body_and_headers Wrong cookie file, deleting..."); os.remove(ficherocookies)
        # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs
        if cookielib is not None:
            _log("read_body_and_headers opener using urllib2 (cookielib)")
            # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2
            if not follow_redirects: opener=urllib2.build_opener(urllib2.HTTPHandler(debuglevel=http_debug_log_enabled),urllib2.HTTPCookieProcessor(cj),NoRedirectHandler())
            else: opener=urllib2.build_opener(urllib2.HTTPHandler(debuglevel=http_debug_log_enabled),urllib2.HTTPCookieProcessor(cj))
            urllib2.install_opener(opener)
        else:
            _log("read_body_and_headers opener using ClientCookie")
            # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie
            opener=ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)); ClientCookie.install_opener(opener)
    # -------------------------------------------------
    # Cookies instaladas, lanza la petición
    # -------------------------------------------------
    inicio=time.clock() # Contador
    txheaders={} # Diccionario para las cabeceras
    if post is None: _log("read_body_and_headers GET request") # Construye el request
    else: _log("read_body_and_headers POST request")
    _log("read_body_and_headers ---------------------------") # Añade las cabeceras
    for header in headers: _log("read_body_and_headers header %s=%s" % (str(header[0]),str(header[1]))); txheaders[header[0]]=header[1]
    _log("read_body_and_headers ---------------------------"); req=Request(url,post,txheaders)
    if timeout is None: handle=urlopen(req)
    else:        
        #Disponible en python 2.6 en adelante --> handle = urlopen(req, timeout=timeout) #Para todas las versiones:
        try: import socket; deftimeout=socket.getdefaulttimeout(); socket.setdefaulttimeout(timeout); handle=urlopen(req); socket.setdefaulttimeout(deftimeout)
        except:
            import sys
            for line in sys.exc_info(): _log( "%s" % line )
    cj.save(ficherocookies) # Actualiza el almacén de cookies
    # Lee los datos y cierra
    if handle.info().get('Content-Encoding')=='gzip': buf=StringIO(handle.read()); f=gzip.GzipFile(fileobj=buf); data=f.read()
    else: data=handle.read()
    info=handle.info(); _log("read_body_and_headers Response"); returnheaders=[]; _log("read_body_and_headers ---------------------------")
    for header in info: _log("read_body_and_headers "+header+"="+info[header]); returnheaders.append([header,info[header]])
    handle.close(); _log("read_body_and_headers ---------------------------")
    '''
    # Lanza la petición
    try: response = urllib2.urlopen(req)
    # Si falla la repite sustituyendo caracteres especiales
    except:
        req = urllib2.Request(url.replace(" ","%20"))
        # Añade las cabeceras
        for header in headers: req.add_header(header[0],header[1])
        response = urllib2.urlopen(req)
    '''
    # Tiempo transcurrido
    fin=time.clock(); _log("read_body_and_headers Downloaded in %d seconds " % (fin-inicio+1)); _log("read_body_and_headers body="+data); return data,returnheaders
Пример #55
0
 def __init__(self, url = scalaris.DEFAULT_URL, timeout = socket.getdefaulttimeout()):
     scalaris.JSONConnection.__init__(self, url = scalaris.DEFAULT_URL, timeout = timeout)
Пример #56
0
    def _make_request(self,
                      conn,
                      method,
                      url,
                      timeout=_Default,
                      **httplib_request_kw):
        """
        Perform a request on a given urllib connection object taken from our
        pool.

        :param conn:
            a connection from one of our connection pools

        :param timeout:
            Socket timeout in seconds for the request. This can be a
            float or integer, which will set the same timeout value for
            the socket connect and the socket read, or an instance of
            :class:`urllib3.util.Timeout`, which gives you more fine-grained
            control over your timeouts.
        """
        self.num_requests += 1

        timeout_obj = self._get_timeout(timeout)

        timeout_obj.start_connect()
        conn.timeout = timeout_obj.connect_timeout
        # conn.request() calls httplib.*.request, not the method in
        # urllib3.request. It also calls makefile (recv) on the socket.
        conn.request(method, url, **httplib_request_kw)

        # Reset the timeout for the recv() on the socket
        read_timeout = timeout_obj.read_timeout

        # App Engine doesn't have a sock attr
        if hasattr(conn, 'sock'):
            # In Python 3 socket.py will catch EAGAIN and return None when you
            # try and read into the file pointer created by http.client, which
            # instead raises a BadStatusLine exception. Instead of catching
            # the exception and assuming all BadStatusLine exceptions are read
            # timeouts, check for a zero timeout before making the request.
            if read_timeout == 0:
                raise ReadTimeoutError(
                    self, url,
                    "Read timed out. (read timeout=%s)" % read_timeout)
            if read_timeout is Timeout.DEFAULT_TIMEOUT:
                conn.sock.settimeout(socket.getdefaulttimeout())
            else:  # None or a value
                conn.sock.settimeout(read_timeout)

        # Receive the response from the server
        try:
            try:  # Python 2.7+, use buffering of HTTP responses
                httplib_response = conn.getresponse(buffering=True)
            except TypeError:  # Python 2.6 and older
                httplib_response = conn.getresponse()
        except SocketTimeout:
            raise ReadTimeoutError(
                self, url, "Read timed out. (read timeout=%s)" % read_timeout)

        except BaseSSLError as e:
            # Catch possible read timeouts thrown as SSL errors. If not the
            # case, rethrow the original. We need to do this because of:
            # http://bugs.python.org/issue10272
            if 'timed out' in str(e) or \
               'did not complete (read)' in str(e):  # Python 2.6
                raise ReadTimeoutError(self, url, "Read timed out.")

            raise

        except SocketError as e:  # Platform-specific: Python 2
            # See the above comment about EAGAIN in Python 3. In Python 2 we
            # have to specifically catch it and throw the timeout error
            if e.errno in _blocking_errnos:
                raise ReadTimeoutError(
                    self, url,
                    "Read timed out. (read timeout=%s)" % read_timeout)

            raise

        # AppEngine doesn't have a version attr.
        http_version = getattr(conn, '_http_vsn_str', 'HTTP/?')
        log.debug("\"%s %s %s\" %s %s" %
                  (method, url, http_version, httplib_response.status,
                   httplib_response.length))
        return httplib_response
 def connect(self):
     old_timeout = socket.getdefaulttimeout()
     self.sock = socket.create_connection((self.host, self.port),
                                          timeout=CLIENT_CONNECT_TIMEOUT_S)
     self.sock.settimeout(old_timeout)
Пример #58
0
    def _node_refresh_businfo(self, node, api, bad_node=False):
        """
        Retrieve bus info from the node and update nodes and edges as appropriate
        @param node: node name
        @type  node: str
        @param api: XML-RPC proxy
        @type  api: ServerProxy
        @param bad_node: If True, node has connectivity issues and
        should be treated differently
        @type  bad_node: bool
        """
        try:
            logger.debug("businfo: contacting node [%s] for bus info", node)
            
            # unmark bad node, though it stays on the bad list
            if bad_node:
                self._unmark_bad_node(node)            
            # Lower the socket timeout as we cannot abide by slow HTTP timeouts.
            # If a node cannot meet this timeout, it goes on the bad list
            # TODO: override transport instead.
            old_timeout = socket.getdefaulttimeout()
            if bad_node:
                #even stricter timeout for bad_nodes right now
                socket.setdefaulttimeout(0.2)
            else:
                socket.setdefaulttimeout(1.0)
                
            code, msg, bus_info = api.getBusInfo(_ROS_NAME)
            
            socket.setdefaulttimeout(old_timeout)
        except Exception as e:
            # node is (still) bad
            self._mark_bad_node(node, str(e))
            code = -1
            msg = traceback.format_exc()

        updated = False
        if code != 1:
            logger.error("cannot get stats info from node [%s]: %s", node, msg)
        else:
            # [[connectionId1, destinationId1, direction1, transport1, ...]... ]
            for info in bus_info:
                # #3579 bad node, ignore
                if len(info) < 5:
                    continue
                
                connection_id = info[0]
                dest_id       = info[1]
                direction     = info[2]
                transport     = info[3]
                topic         = info[4]
                if len(info) > 5:
                    connected = info[5]
                else:
                    connected = True #backwards compatibility

                if connected and topic.startswith(self.topic_ns):
                    # blindly add as we will be able to catch state change via edges.
                    # this currently means we don't cleanup topics
                    self.nt_nodes.add(topic_node(topic))

                    # update node->topic->node graph edges
                    updated = self.nt_edges.add_edges(node, topic_node(topic), direction) or updated

                    # update node->node graph edges
                    if dest_id.startswith('http://'):
                        #print("FOUND URI", dest_id)
                        dest_name = self.uri_node_map.get(dest_id, None)
                        updated = self.nn_edges.add_edges(node, dest_name, direction, topic) or updated
                else:
                    #TODO: anyting to do here?
                    pass
        return updated
Пример #59
0
    def _make_request(
        self, conn, method, url, timeout=_Default, chunked=False, **httplib_request_kw
    ):
        """
        Perform a request on a given urllib connection object taken from our
        pool.

        :param conn:
            a connection from one of our connection pools

        :param timeout:
            Socket timeout in seconds for the request. This can be a
            float or integer, which will set the same timeout value for
            the socket connect and the socket read, or an instance of
            :class:`urllib3.util.Timeout`, which gives you more fine-grained
            control over your timeouts.
        """
        self.num_requests += 1

        timeout_obj = self._get_timeout(timeout)
        timeout_obj.start_connect()
        conn.timeout = timeout_obj.connect_timeout

        # Trigger any extra validation we need to do.
        try:
            self._validate_conn(conn)
        except (SocketTimeout, BaseSSLError) as e:
            # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout.
            self._raise_timeout(err=e, url=url, timeout_value=conn.timeout)
            raise

        # conn.request() calls http.client.*.request, not the method in
        # urllib3.request. It also calls makefile (recv) on the socket.
        try:
            if chunked:
                conn.request_chunked(method, url, **httplib_request_kw)
            else:
                conn.request(method, url, **httplib_request_kw)

        # We are swallowing BrokenPipeError (errno.EPIPE) since the server is
        # legitimately able to close the connection after sending a valid response.
        # With this behaviour, the received response is still readable.
        except BrokenPipeError:
            # Python 3
            pass
        except IOError as e:
            # Python 2 and macOS/Linux
            # EPIPE and ESHUTDOWN are BrokenPipeError on Python 2, and EPROTOTYPE is needed on macOS
            # https://erickt.github.io/blog/2014/11/19/adventures-in-debugging-a-potential-osx-kernel-bug/
            if e.errno not in {
                errno.EPIPE,
                errno.ESHUTDOWN,
                errno.EPROTOTYPE,
            }:
                raise

        # Reset the timeout for the recv() on the socket
        read_timeout = timeout_obj.read_timeout

        # App Engine doesn't have a sock attr
        if getattr(conn, "sock", None):
            # In Python 3 socket.py will catch EAGAIN and return None when you
            # try and read into the file pointer created by http.client, which
            # instead raises a BadStatusLine exception. Instead of catching
            # the exception and assuming all BadStatusLine exceptions are read
            # timeouts, check for a zero timeout before making the request.
            if read_timeout == 0:
                raise ReadTimeoutError(
                    self, url, "Read timed out. (read timeout=%s)" % read_timeout
                )
            if read_timeout is Timeout.DEFAULT_TIMEOUT:
                conn.sock.settimeout(socket.getdefaulttimeout())
            else:  # None or a value
                conn.sock.settimeout(read_timeout)

        # Receive the response from the server
        try:
            try:
                # Python 2.7, use buffering of HTTP responses
                httplib_response = conn.getresponse(buffering=True)
            except TypeError:
                # Python 3
                try:
                    httplib_response = conn.getresponse()
                except BaseException as e:
                    # Remove the TypeError from the exception chain in
                    # Python 3 (including for exceptions like SystemExit).
                    # Otherwise it looks like a bug in the code.
                    six.raise_from(e, None)
        except (SocketTimeout, BaseSSLError, SocketError) as e:
            self._raise_timeout(err=e, url=url, timeout_value=read_timeout)
            raise

        # AppEngine doesn't have a version attr.
        http_version = getattr(conn, "_http_vsn_str", "HTTP/?")
        log.debug(
            '%s://%s:%s "%s %s %s" %s %s',
            self.scheme,
            self.host,
            self.port,
            method,
            url,
            http_version,
            httplib_response.status,
            httplib_response.length,
        )

        try:
            assert_header_parsing(httplib_response.msg)
        except (HeaderParsingError, TypeError) as hpe:  # Platform-specific: Python 3
            log.warning(
                "Failed to parse headers (url=%s): %s",
                self._absolute_url(url),
                hpe,
                exc_info=True,
            )

        return httplib_response
Пример #60
0
  def Send(self, request_path, payload="",
           content_type="application/octet-stream",
           timeout=None,
           **kwargs):
    """Sends an RPC and returns the response.

    Args:
      request_path: The path to send the request to, eg /api/appversion/create.
      payload: The body of the request, or None to send an empty request.
      content_type: The Content-Type header to use.
      timeout: timeout in seconds; default None i.e. no timeout.
        (Note: for large requests on OS X, the timeout doesn't work right.)
      kwargs: Any keyword arguments are converted into query string parameters.

    Returns:
      The response body, as a string.
    """
    old_timeout = socket.getdefaulttimeout()
    socket.setdefaulttimeout(timeout)
    try:
      tries = 0
      auth_tried = False
      while True:
        tries += 1
        url = "%s://%s%s" % (self.scheme, self.host, request_path)
        if kwargs:


          url += "?" + six.moves.urllib.parse.urlencode(sorted(kwargs.items()))
        req = self._CreateRequest(url=url, data=payload)
        req.add_header("Content-Type", content_type)



        req.add_header("X-appcfg-api-version", "1")

        try:
          logger.debug('Sending %s request:\n%s',
                       self.scheme.upper(),
                       HttpRequestToString(req, include_data=self.debug_data))
          f = self.opener.open(req)
          response = f.read()
          f.close()

          return response
        except six.moves.urllib.error.HTTPError as e:
          logger.debug("Got http error, this is try #%s", tries)


          if tries > self.rpc_tries:
            raise
          elif e.code == 401:

            if auth_tried:
              raise
            auth_tried = True
            self._Authenticate()
          elif e.code >= 500 and e.code < 600:

            continue
          elif e.code == 302:


            if auth_tried:
              raise
            auth_tried = True
            loc = e.info()["location"]
            logger.debug("Got 302 redirect. Location: %s", loc)
            if loc.startswith("https://www.google.com/accounts/ServiceLogin"):
              self._Authenticate()
            elif re.match(
                r"https://www\.google\.com/a/[a-z0-9\.\-]+/ServiceLogin", loc):
              self.account_type = os.getenv("APPENGINE_RPC_HOSTED_LOGIN_TYPE",
                                            "HOSTED")
              self._Authenticate()
            elif loc.startswith("http://%s/_ah/login" % (self.host,)):
              self._DevAppServerAuthenticate()
            else:
              raise
          else:
            raise
    finally:
      socket.setdefaulttimeout(old_timeout)