def read_url(self, url): print('URL: ' + url) request = urllib.request.urlopen(url) if request.getcode() == 200: return request.read()
def url_req(url): request = urllib.request.urlopen(url) if (request.getcode() == 200): data = json.loads(request.read()) else: print("Error receiving data", request.getcode()) return data
def get(self, data=None): logger.debug("GET %s", self.url) req = urllib.request.Request(url=self.url, data=data, headers=self.headers) try: if self.opener: request = self.opener.open(req, timeout=self.timeout) else: request = urllib.request.urlopen(req, timeout=self.timeout) except (urllib.error.HTTPError, CertificateError) as error: if error.code == 401: raise UnauthorizedAccess("Access to %s denied" % self.url) else: raise HTTPError("Request to %s failed: %s" % (self.url, error)) except (socket.timeout, urllib.error.URLError) as error: raise HTTPError("Unable to connect to server %s: %s" % (self.url, error)) if request.getcode() > 200: logger.debug("Server responded with status code %s", request.getcode()) try: self.total_size = int(request.info().get("Content-Length").strip()) except AttributeError: logger.warning("Failed to read response's content length") self.total_size = 0 self.response_headers = request.getheaders() self.status_code = request.getcode() if self.status_code > 299: logger.warning("Request responded with code %s", self.status_code) self.content = b"".join(self._iter_chunks(request)) self.info = request.info() request.close() return self
def get(self, data=None): logger.debug("GET %s", self.url) req = urllib.request.Request(url=self.url, data=data, headers=self.headers) try: if self.opener: request = self.opener.open(req, timeout=self.timeout) else: request = urllib.request.urlopen(req, timeout=self.timeout) except (urllib.error.HTTPError, CertificateError) as error: raise HTTPError("Unavailable url %s: %s" % (self.url, error)) except (socket.timeout, urllib.error.URLError) as error: raise HTTPError("Unable to connect to server %s: %s" % (self.url, error)) if request.getcode() > 200: logger.debug("Server responded with status code %s", request.getcode()) try: total_size = request.info().get("Content-Length").strip() total_size = int(total_size) except AttributeError: logger.warning("Failed to read response's content length") total_size = 0 self.response_headers = request.getheaders() self.status_code = request.getcode() if self.status_code > 299: logger.warning("Request responded with code %s", self.status_code) chunks = [] while 1: if self.stop_request and self.stop_request.is_set(): self.content = "" return self try: chunk = request.read(self.buffer_size) except socket.timeout: logger.error("Request timed out") self.content = "" return self self.downloaded_size += len(chunk) if self.thread_queue: self.thread_queue.put( (chunk, self.downloaded_size, total_size)) else: chunks.append(chunk) if not chunk: break request.close() self.content = b"".join(chunks) self.info = request.info() return self
def get_page(self, url): self.num += 1 try: request = urllib.request.urlopen(url) except: return False print("正在进行第" + str(self.num) + "爬取,url[" + url + "]...") if request.getcode() != 200: print("爬取失败,错误代码:[" + request.getcode() + "]") return False else: self.success_num += 1 print("爬取成功,正在分析网页内容...") return request.read().decode("utf-8")
def downfile(url, path): """下载文件""" request = urllib.request.urlopen(url, data=None, timeout=60) if request.getcode() == 200: data = request.read() with open(path, 'wb') as filestream: filestream.write(data)
def Downfile(self, url, dir, name="", isSave=False): """下载文件""" if not os.path.exists(dir): os.mkdir(dir) if name != "": filePath = os.path.join(dir, name) if os.path.exists(filePath): with open(filePath, "rb") as filestream: return filestream.read() else: print(url) headers = { "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36" } requestContent = urllib.request.Request(url=url, headers=headers) request = urllib.request.urlopen(requestContent, data=None, timeout=60) if request.getcode() == 200: data = request.read() if isSave: with open(filePath, 'wb') as filestream: # print(filePath) filestream.write(data) return data
def download(self, url): if url is not None: try: # Python3,加上超时检测 request = urllib.request.urlopen(url, timeout=10) if request.getcode() == 200: # return request.read().decode("utf-8") return request.read() else: return None except Exception as err: print(str(err)) else: return None # requests模块 用这个方法输出乱码 # import requests # # class HtmlDownloader(object): # # def download(self, url): # if url is None: # return None # # response = requests.get(url) # # if response.status_code != 200: # return None #请求失败 # else: # return response.text #返回下载好的内容
async def download_coroutine(url): l=0 try: request = urllib.request.urlopen(url) if request.getcode() == 200: print('Web site exists') else: l=1 print("Website returned response code: {code}".format(code=request.status_code)) except: print('Web site does not exist') l=1 filename = os.path.basename(url) if l==0: with open(filename, 'wb') as file_handle: while True: chunk = request.read(1024) if not chunk: break file_handle.write(chunk) msg = 'Finished downloading {filename}'.format(filename=filename) if l==1: return 'ERROR' else: return filename
def extractRealSupportedURI(uri): """ Returns "real" URI if it survives redirects and returns a 200. Returns None otherwise. """ realURI = None try: # this function follows the URI, resolving all redirects, # and detects redirect loops # iri2uri is needed for IRIs request = urllib.request.urlopen(httplib2.iri2uri(uri)) if request.getcode() == 200: realURI = request.geturl() except urllib.error.HTTPError as e: # something went wrong, we don't care what realURI = None except urllib.error.URLError as e: # something went wrong, we don't care what realURI = None except UnicodeError as e: # something went very wrong with the IRI decoding realURI = None return realURI
def get_city(): request = urllib.request.urlopen("http://ip-api.com/json/" + get_ip()) if request.getcode() == 200: data = json.loads(request.read()) return data["city"] else: print(f"E: {request.getcode()}")
def get_ip(): request = urllib.request.urlopen("https://api.ipify.org?format=json") if request.getcode() == 200: data = json.loads(request.read()) return data["ip"] else: print(f"E: {request.getcode()}")
def test_templates_loaded(self): with open('./al_visitor_site/resources/template_urls.txt', 'r') as fp: urls = [line for line in fp.readlines()] count = 0 fails = 0 for prod_url in urls: code = 0 test_url = prod_url.replace('https://www.angieslist.com', self.visitor_site_url) try: request = urllib.request.urlopen(test_url) count += 1 code = request.getcode() print("URL %s tested. code %s from: %s" % (count, code, test_url)) self.assertTrue(200 <= code < 400) except urllib.error.HTTPError as e: fail += 1 print("FAIL: Recieved http code %s from %s" % (e.code, test_url)) except urllib.error.URLError: fail += 1 print("FAIL: URL error - %s" % test_url) except ConnectionRefusedError as cre: fail += 1 print("FAIL: Connection refused - %s" % test_url) print("-------------------\n\n%s URL(s) tested\n-------------------" % count)
def get_stats(self): request = urllib.request.urlopen(self.battlemetrics_url) if (request.getcode() == 200): response = request.read() self.server_stats = json.loads(response) return self.server_stats else: exit(1)
def get_statuscode(url): """get statuscode of a url""" try: request = urllib.request.urlopen(url) except urllib.error.HTTPError as e: return e.code else: return request.getcode()
async def test_connection(model, app): # Ignore self signed SSL cert directly on unit ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE unifi_unit = app.units[0] address = f"https://{unifi_unit.public_address}.xip.io:8443" print(f"Checking address: {address}") request = urllib.request.urlopen(address, context=ctx) assert request.getcode() == 200
def __is_public_repository(self, url): request = urllib.request.urlopen(url) request_url = request.geturl() if url == request_url or url.rsplit('.', 1)[0] == request_url: try: if request.getcode() == 200: return True except URLError: pass return False
def get_album_art_url(self, album_id): # Sadly we cannot determine, if the Beets library really contains album # art. Thus we need to ask for it and check the status code. url = "{0}/album/{1}/art".format(self.api_endpoint, album_id) try: request = urllib.request.urlopen(url) except IOError: # DNS problem or similar return None request.close() return url if request.getcode() == 200 else None
def get_location_description(lat, lon): url = "https://nominatim.openstreetmap.org/search.php?q=" + str( lat) + "%2C" + str(lon) + "&polygon_geojson=1&format=jsonv2" request = urllib.request.urlopen(url) if (request.getcode() != 200): return "" data = json.loads(request.read()) if len(data) == 0: return "" if 'display_name' in data[0]: return data[0]['display_name'] return ""
def read_metadata(self, uri): metadata = {} request = urllib.request.urlopen(uri) if request.getcode() == 200: try: metadata = json.loads(request.read()) except (TypeError, json.JSONDecodeError): pass return metadata
def test_wait_for_request(self): """Method used to test if the callback server is running and responding as expected.""" server_thread = threading.Thread( target=callback_server.CallbackServer.wait_for_request) server_thread.start() request = urllib.request.urlopen(properties.SPOTIFY_REDIRECT_URL) http_code = request.getcode() response_body = request.read() self.assertEqual(http_code, 200) self.assertEqual(response_body, properties.CALLBACK_RESPONSE_BODY)
def get_ron_swanson_quote() -> Quote: """ Get a quote from Ron Swanson :return: Quote, can be empty """ url = 'https://ron-swanson-quotes.herokuapp.com/v2/quotes' with urllib.request.urlopen(url) as request: if request.getcode() != 200: logging.error( f'Failed to reach DarkSky, error code = {request.getcode()}') return Quote('', '') data = json.loads(request.read()) return Quote(data[0], 'Ron Swanson')
def get_city(default_city="London"): try: request = urllib.request.urlopen("https://ipapi.co/json") if request.getcode() == 200: try: data = json.loads(request.read()) return return_city(data["city"]) except json.JSONDecodeError: print("E: Couldn't load Json data.") else: print(f"E: {request.getcode()}") except: return return_city(default_city)
def raw_stripes(self, dataset, columns, rgid, use_cache=None, compress=False): if use_cache is None: use_cache = self.UseDataCache out_data = {} column_list = ",".join(columns) url = "./stripes?ds=%s&columns=%s&rgid=%s&compressed=%s" % \ (dataset, column_list, rgid, "yes" if compress else "no") #print "url:", url request = self.requestWithRetries(url, bypass_cache=not use_cache, timeout=120) data = request.read() #print "data: %s" % (repr(data[:100]),) header_end = data.index(b"\n") header = data[:header_end] i = header_end + 1 #print "header: [%s]" %(header,) for w in header.split(): try: cn, length = w.split(b":") length = int(length) cn = to_str(cn) except ValueError: sys.stderr.write("Error parsing header [%s] url:%s status:%s" % (header, url, request.getcode())) sys.stderr.write("request status=%s" % (request.getcode(), )) sys.stderr.write(traceback.format_exc() + "\n") sys.exit(1) segment = data[i:i + length] if compress: segment = zlib.decompress(segment) out_data[cn] = segment i += length return out_data
def is_site_available(self, site="", path=""): '''This function retrieves the status code of a web site by requesting HEAD data from the host. This means that it only requests the headers. If the host cannot be reached or something else goes wrong, it returns False. This will only work if the self.set_no_proxy method is used before this method is called. :param site: string; fqdn (domain); ex: http://www.google.com/ (Default value = "") :param path: string; the rest of the URL; ex: docs/about (Default value = "") :returns: retval :rtype: bool @author: ??? @change: 02/12/2018 - Breen Malmberg - added doc string decorators; proxy will now be set for the test if the use_proxy argument in __init__ is True. ''' retval = True try: if self.use_proxy: self.set_proxy() page = site + path req = urllib.request.Request( page, headers={'User-Agent': "Magic Browser"}) req.add_header('User-agent', 'Firefox/31.5.0') request = urllib.request.urlopen(req, timeout=3) retcode = request.getcode() # get the first digit of the return code # if it is not in the 200 range, then an error has occurred # (all http successful response codes are in the 2xx range) idd = int(str(retcode)[:1]) if idd != 2: self.logger.log( LogPriority.DEBUG, "Failed to reach specified page: " + str(page) + " with HTTP error code: " + str(retcode)) if retval: self.logger.log(LogPriority.DEBUG, "Site is available.") except Exception: raise return retval
def openweather(city_name, units, api_key): try: request = urllib.request.urlopen( f"{OPENWEATHER_URL}?q={city_name.replace(' ', '+')}&units={units}&appid={api_key}" ) if request.getcode() == 200: data = json.loads(request.read()) _id = data["id"] name = data["name"] timezone = data["timezone"] country = data["sys"]["country"] sunrise = data["sys"]["sunrise"] sunset = data["sys"]["sunset"] temp = data["main"]["temp"] temp_min = data["main"]["temp_min"] temp_max = data["main"]["temp_max"] humidity = data["main"]["humidity"] feels_like = data["main"]["feels_like"] description = data["weather"][0]["description"] if units == "metric": unit = "ºC" elif units == "imperial": unit = "ºF" else: unit = " K" return ( _id, name, timezone, country, sunrise, sunset, temp, temp_min, temp_max, humidity, feels_like, description, unit, ) else: print(f"E: {request.getcode()}") except urllib.error.HTTPError as e: print(e)
def get_report(self): url = 'https://api.darksky.net/forecast/' + self._api.key() + '/' + \ self._location.lat() + ',' + self._location.lon() + \ '?lang=' + self.lang() + '&units=si&exclude=daily' logging.info(f'Contacting DarkSky...') with urllib.request.urlopen(url) as request: if request.getcode() != 200: logging.error(f'Failed to reach DarkSky, error code = {request.getcode()}') return False data = json.loads(request.read()) logging.info('Data retrieved from DarkSky') self._read_json(data) return True
def iso3(): try: return json.loads(open(Path(__file__).parent / "iso3.json").read()) except: try: request = urllib.request.urlopen("http://country.io/iso3.json") if request.getcode() == 200: try: return json.loads(request.read()) except json.JSONDecodeError: print("E: Couldn't load Json data.") else: print(f"E: {request.getcode()}") except urllib.error.HTTPError: print("E: 404, url not found!")
def geturl(url): while True: try: request = urllib.request.urlopen(url) except urllib.error.HTTPError as e: err = e.read() status = e.getcode() else: status = request.getcode() if status == 200 and request != None: content = request.read() content = content.decode("utf-8") return json.loads(content) break
def downloadImage(url, path, filename): """ Result : [0] = Status [1] = Filename [2] = URL [3] = Full path [4] = Start time (time) [5] = Finish time (time) """ startTime = time.time() res = [] reqCode = 0 fileNameTemp = "" fullpath = pathCombine(path, filename) if(checkConnection() == True): try: request = urllib.request.urlopen(url, timeout=30) reqCode = request.getcode() except (timeout, socket.error,urllib.error.URLError, urllib.error.HTTPError): reqCode = 408 fileNameTemp = "-" if(reqCode > 199 and reqCode < 300): if(os.getcwd() != path): os.chdir(path) try: downloading, downloadHeader = urllib.request.urlretrieve(url,fullpath) fileNameTemp = filename except (urllib.error.URLError) as e1: reqCode = 901 fileNameTemp = "-" except (urllib.error.HTTPError) as e2: reqCode = e2.code fileNameTemp = "-" except (urllib.error.ContentTooShortError) as e: reqCode = 902 fileNameTemp = "-" else: if(reqCode == 0): reqCode = -1 finishTime = time.time() res = [statusDownload(reqCode), fileNameTemp, url, path, startTime, finishTime] return res else: print("Connection is lost")
def openweather(city, lang, unit, api_key=OPENWEATHER_API_KEY): try: url = f"{OPENWEATHER_URL}?q={city}&lang={lang}&units={unit}&appid={api_key}" request = urllib.request.urlopen(url) if request.getcode() == 200: data = json.loads(request.read()) return { "name": data["name"], "country": iso3().get(data["sys"]["country"]), "temp": int(data["main"]["temp"]), "unit": check_unit(unit), "description": data["weather"][0]["description"], } else: print(f"E: {request.getcode()}") except: return None
def lookup_raw(self, url=None, **kwargs): url = url or self.url if url is None: raise Error('must specify url') kwargs['url'] = url request_url = (self.endpoint % kwargs) + '?' + urlencode(kwargs) try: request = urllib.request.urlopen( urllib.request.Request(request_url, headers={'User-Agent': 'Python/oEmbed'})) except urllib.error.HTTPError as request: pass code = request.getcode() if code == 200: return request.read() raise _http_errors.get(code, HTTPError)(code, request_url)
def probarPermutacion(palabra, fuentes): # print("buscando \"{}\"".format(palabra)) for fuente in fuentes: direccion = fuente + palabra # si encuentra la permutación en esta fuente, entonces esta es la palabra. request = None try: request = urllib.request.urlopen(direccion) codigo = request.getcode() except urllib.error.HTTPError as error: codigo = error.code finally: if request: request.close() if codigo == 200: print("Encontré \"{}\" en \"{}\".".format(palabra, direccion)) return True return False
async def test_relation(model, app): haproxy = model.applications["haproxy"] haproxy_unit = haproxy.units[0] config = await app.get_config() subdomain = config["proxy-subdomain"]["value"] address = f"http://{subdomain}.{haproxy_unit.public_address}.xip.io/admin" print(f"Checking address: {address}") request = urllib.request.urlopen(address) info = request.info() print(f"Info: {info}") assert request.getcode() == 200 server_id = "not found" for item in info.values(): if "SERVERID" in item: server_id = item.split(";")[0] else: continue print(f"server_id: {server_id}") assert subdomain in server_id
def getTimeMap(uri): urit = "http://mementoproxy.cs.odu.edu/aggr/timemap/link/" + uri try: request = urllib.request.urlopen(urit) if request.getcode() == 200: timemap = request.readall() request.close() else: timemap = None request.close() except urllib.error.HTTPError as e: timemap = None except urllib.error.URLError as e: timemap = None return timemap
def get(): """ Returns properly formatted weather for Rochester, NY City can be changed by grabbing the proper openweathermap.org url. """ weather_string = "Weather Unavailable" weather_url = "http://api.openweathermap.org/data/2.1/weather/city/5134086" request = urllib.request.urlopen(weather_url) weather_info = json.loads(request.read().decode("utf-8")) if(request.getcode() not in range(200, 300)): request.close return weather_string request.close if weather_info is not None: temp = str(k_to_f(weather_info['main']['temp'])) #state = str(weather_info['weather'][0]['main']) desc = str(weather_info['weather'][0]['description']) #weather_string = temp + " degrees, " + desc weather_string = temp + "°F, " + desc return weather_string