def open(self, url, **params): if url.startswith('http') and params: r = requests.get(url, params=params, stream=True) r.raw.decode_content = self.decode response = r.text if self.cache_type else r.raw else: try: r = urlopen(url, context=self.context, timeout=self.timeout) except TypeError: r = urlopen(url, timeout=self.timeout) text = r.read() if self.cache_type else None if self.decode: encoding = get_response_encoding(r, self.def_encoding) if text: response = decode(text, encoding) else: response = reencode(r.fp, encoding, decode=True) else: response = text or r content_type = get_response_content_type(r) if 'xml' in content_type: self.ext = 'xml' elif 'json' in content_type: self.ext = 'json' else: self.ext = content_type.split('/')[1].split(';')[0] self.r = r return response
def open(self, url, **params): if url.startswith('http') and params: r = requests.get(url, params=params, stream=True) r.raw.decode_content = self.decode response = r.text if self.cache_type else r.raw else: try: r = urlopen(url, context=self.context, timeout=self.timeout) except TypeError: r = urlopen(url, timeout=self.timeout) text = r.read() if self.cache_type else None if self.decode: encoding = get_response_encoding(r, self.def_encoding) if self.cache_type: response = decode(text, encoding) else: response = reencode(r.fp, encoding, decode=True) else: response = text if self.cache_type else r self.r = r return response
def test_reencode(self): file_ = p.join(io.DATA_DIR, "utf16_big.csv") with open(file_, encoding="utf-16-be") as f: utf8_f = io.reencode(f, remove_BOM=True) nt.assert_equal(b"a,b,c", next(utf8_f).strip()) nt.assert_equal(b"1,2,3", next(utf8_f).strip()) nt.assert_equal("4,5,ʤ", next(utf8_f).decode("utf-8"))
def test_reencode(self): file_ = p.join(io.DATA_DIR, 'utf16_big.csv') with open(file_, encoding='utf-16-be') as f: utf8_f = io.reencode(f, remove_BOM=True) nt.assert_equal(b'a,b,c', next(utf8_f).strip()) nt.assert_equal(b'1,2,3', next(utf8_f).strip()) nt.assert_equal('4,5,ʤ', next(utf8_f).decode('utf-8'))
def opener(url, memoize=False, delay=0, encoding=ENCODING, params=None, **kwargs): params = params or {} timeout = kwargs.get('timeout') decode = kwargs.get('decode') if url.startswith('http') and params: r = requests.get(url, params=params, stream=True) r.raw.decode_content = decode response = r.text if memoize else r.raw else: req = Request(url, headers={'User-Agent': default_user_agent()}) context = SleepyDict(delay=delay) if delay else None try: r = urlopen(req, context=context, timeout=timeout) except TypeError: r = urlopen(req, timeout=timeout) except HTTPError as e: raise URLError(f'{url} returned {e.code}: {e.reason}') except URLError as e: raise URLError(f'{url}: {e.reason}') text = r.read() if memoize else None if decode: encoding = get_response_encoding(r, encoding) if text: response = compat.decode(text, encoding) else: response = reencode(r.fp, encoding, decode=True) response.r = r else: response = text or r content_type = get_response_content_type(r) return (response, content_type)
def open(self, url, **params): if url.startswith('http') and params: r = requests.get(url, params=params, stream=True) r.raw.decode_content = self.decode response = r.text if self.cache_type else r.raw else: req = Request(url, headers={'User-Agent': default_user_agent()}) try: r = urlopen(req, context=self.context, timeout=self.timeout) except TypeError: r = urlopen(req, timeout=self.timeout) except HTTPError as e: msg = '{} returned {}: {}' raise URLError(msg.format(url, e.code, e.reason)) except URLError as e: raise URLError('{}: {}'.format(url, e.reason)) text = r.read() if self.cache_type else None if self.decode: encoding = get_response_encoding(r, self.def_encoding) if text: response = decode(text, encoding) else: response = reencode(r.fp, encoding, decode=True) else: response = text or r content_type = get_response_content_type(r) if 'xml' in content_type: self.ext = 'xml' elif 'json' in content_type: self.ext = 'json' else: self.ext = content_type.split('/')[1].split(';')[0] self.r = r return response
def open(self, url, **params): if url.startswith('http') and params: r = requests.get(url, params=params, stream=True) r.raw.decode_content = self.decode response = r.text if self.cache_type else r.raw else: try: r = urlopen(url, context=self.context, timeout=self.timeout) except TypeError: r = urlopen(url, timeout=self.timeout) except HTTPError as e: msg = '{} returned {}: {}' raise URLError(msg.format(url, e.code, e.reason)) except URLError as e: raise URLError('{}: {}'.format(url, e.reason)) text = r.read() if self.cache_type else None if self.decode: encoding = get_response_encoding(r, self.def_encoding) if text: response = decode(text, encoding) else: response = reencode(r.fp, encoding, decode=True) else: response = text or r content_type = get_response_content_type(r) if 'xml' in content_type: self.ext = 'xml' elif 'json' in content_type: self.ext = 'json' else: self.ext = content_type.split('/')[1].split(';')[0] self.r = r return response