def method(url, amethod='get', proxies=AUTO_GET_PROXY, *args): r'''#TODO: post etc need args # proxies ''' U, T, N, F = py.importUTNF() try: url = N.auto_url(url) request = grequest(url) request.get_method = lambda: amethod.upper() response = grequest.urlopen(request) return response # except urllib2.HTTPError as eh: # setError(eh) except Exception as e: setErr(e)
def auto_proxy_for_requests(proxies, ka): ''' proxies:#dict or str 均可 ''' U, T, N, F = py.importUTNF() proxies = U.get_duplicated_kargs(ka, 'proxies', 'proxy', default=proxies) if proxies: proxies = N.set_proxy(proxies) else: if (proxies is AUTO_GET_PROXY) or (py.isNo(proxies) and 'auto get_proxy' in proxies.msg): proxies = N.get_proxy(target_protocol=('http', 'https'), ) else: proxies = {} ka['proxies'] = proxies return proxies, ka
def request(url, method='GET', headers=gheaders, proxies=AUTO_GET_PROXY, verify=False, no_raise=False, print_req=False, **ka): ''' ''' import requests U, T, N, F = py.importUTNF() proxies, ka = auto_proxy(proxies, ka) print_req = U.get_duplicated_kargs(ka, 'show', 'print', 'p', 'print_req', default=print_req) if (py.istr(url) and url.upper() in HTTP_METHODS): # ka.pop('method','')#D.pop(k[,d]) -> v, ka['method'] = url url = ka['url'] # test url exists elif method: ka['method'] = method if headers: ka['headers'] = headers for k in py.list(ka): v = ka[k] if py.istr(k) and py.istr(v): if k[0].isupper(): ka.pop(k) headers[k] = v ka['verify'] = verify if url and 'url' not in ka: ka['url'] = url ka['url'] = N.auto_url(ka['url']) if print_req: print(U.v.requests.request(**ka)) if no_raise: try: return requests.request(**ka) except Exception as e: return py.No(e, ka) return requests.request(**ka)
def post(url, proxies=AUTO_GET_PROXY, **ka): ''' Signature: requests.post(url, data=None, json=None, **kwargs) Docstring: Sends a POST request. :param url: URL for the new :class:`Request` object. :param data: (optional) Dictionary (will be form-encoded), bytes, or file-like object to send in the body of the :class:`Request`. :param json: (optional) json data to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. :return: :class:`Response <Response>` object :rtype: requests.Response File: e:\qgb\anaconda3\lib\site-packages\requests\api.py Type: function ''' U, T, N, F = py.importUTNF() url = N.auto_url(url) proxies, ka = auto_proxy(proxies, ka) import requests return requests.post(url, **ka)
def download_seq(url_format, min=0, max=99, headers={}, **ka): import requests U, T, N, F = py.importUTNF() while '{' not in url_format or '}' not in url_format: url_format = U.set_input('download_seq.url_format', default=url_format) if U.isWin() and U.gst != 'C:/test/': U.gst = U.set_input('U.gst', default=U.gst) domain = T.get_domain_from_url(url_format) file_ext = T.sub_last(url_format, '.') if file_ext.lower() not in ['jpg', 'png', 'jpeg']: file_ext = U.set_input(url_format + ' file_ext[NO DOT]', default=file_ext) if '.' in file_ext: raise Exception(file_ext) for n in range(min, max): url = url_format.format(n) file = T.sub(url, domain) if file.startswith('/'): file = file[1:] f = r'{gst}{domain}/{file}'.format( gst=U.gst, domain=domain, file=T.filename_legalized(file), ) if F.size(f): print('#' * 9, 'Exist', f) continue response = N.HTTP.requests(url, headers=headers, verify=False, no_raise=1) if not response or response.status_code != 200: print(repr(response).strip(), py.getattr(response, 'url', ' No')) return response b = response.content print(U.sizeof(b), F.write(f, b, mkdir=1)) #
#coding=utf-8 import sys, pathlib gsqp = pathlib.Path(__file__).absolute().parent.parent.__str__() #*.py /qgb /[gsqp] if gsqp not in sys.path: sys.path.append(gsqp) #py3 works from qgb import py U, T, N, F = py.importUTNF() import asyncio def patch_nest_asyncio(): import nest_asyncio return nest_asyncio.apply() nest = nest_asyncio = patch_nest_asyncio async def sleep(sec): return await asyncio.sleep(sec) async def websocket_client_send(url, astr): import websockets url = N.auto_url(url, default_protocol='ws') async with websockets.connect(url) as ws: await ws.send(astr) r = await ws.recv() return r
def get( url, file='', headers=gheaders, timeout=9, proxies=AUTO_GET_PROXY, encoding='', show=False, verify=False, **ka, ): U, T, N, F = py.importUTNF() url = N.auto_url(url) show = U.get_duplicated_kargs(ka, 'show', 'print', 'p', 'print_req', default=show) proxies, ka = auto_proxy(proxies, ka) def writeFile(): if file: content = r.content # if content: return F.write(file, content) # else: # return py.No('response.content is Null!') if show: print( U.v.requests.get(url, verify=verify, timeout=timeout, headers=U.StrRepr(U.pformat(headers)), proxies=proxies)) b = b'' try: import requests r = requests.get(url, verify=verify, timeout=timeout, headers=headers, proxies=proxies) if file: u = T.url_split(url).path u = T.sub_last(u, '/') if py.isbool(file): file = u elif F.isdir(file): file = file + u return F.write(file, r.content) #TODO decode if 'text' in r.headers.get('Content-Type', '').lower(): return r.content.decode(encoding or T.detect(r.content[:9999]) or 'utf-8') # try:return r.content.decode('gb18030') # except:pass # return r.text else: b = r.content except ModuleNotFoundError: try: b = grequest.urlopen(url).read() except Exception as e: return py.No(url, e) ##################### except Exception as e: return py.No(e) # try: encoding = T.detect(b[:9999]) # raise Exception('decode error') if b and encoding: return b.decode(encoding) return b
def get_bytes( url, file='', headers=gheaders, timeout=9, proxies=AUTO_GET_PROXY, verify=False, print_req=False, **ka, ): ''' url格式不对时: C:\QGB\Anaconda3\lib\site-packages\socks.py in _write_SOCKS5_address(self, addr, file) 574 socket.SOCK_STREAM, 575 socket.IPPROTO_TCP, --> 576 socket.AI_ADDRCONFIG) 577 # We can't really work out what IP is reachable, so just pick the 578 # first. C:\QGB\Anaconda3\lib\socket.py in getaddrinfo(host, port, family, type, proto, flags) 746 # and socket type values to enum constants. 747 addrlist = [] --> 748 for res in _socket.getaddrinfo(host, port, family, type, proto, flags): 749 af, socktype, proto, canonname, sa = res 750 addrlist.append((_intenum_converter(af, AddressFamily), UnicodeError: encoding with 'idna' codec failed (UnicodeError: label too long) ''' U, T, N, F = py.importUTNF() url = N.auto_url(url) file = U.get_duplicated_kargs(ka, 'file', 'f', 'filename', default=file) write_zero = U.get_duplicated_kargs(ka, 'write0', 'w0', 'write_zero', 'zero', default=False) print_req = U.get_duplicated_kargs(ka, 'show', 'print', 'p', 'print_req', default=print_req) proxies, ka = auto_proxy(proxies, ka) import requests try: if print_req: print( U.v.requests.get(url, verify=verify, timeout=timeout, headers=U.StrRepr(U.pformat(headers)), proxies=proxies)) b = requests.get(url, verify=verify, timeout=timeout, headers=headers, proxies=proxies).content f = repr(b[:77])[2:-1] if file and (b or write_zero): f = F.write(file, b) return U.object_custom_repr(b, repr='{}{}'.format(F.readable_size(b), f)) except Exception as e: return py.No(e)
def thread_pool_request(targets, max_workers=None, request_ka={}, print_log=False, **ka): ''' def __init__(self, max_workers=None, thread_name_prefix='', initializer=None, initargs=()): """Initializes a new ThreadPoolExecutor instance. Args: max_workers: The maximum number of threads that can be used to execute the given calls. thread_name_prefix: An optional name prefix to give our threads. initializer: A callable used to initialize worker threads. initargs: A tuple of arguments to pass to the initializer. """ if max_workers is None: # Use this number because ThreadPoolExecutor is often # used to overlap I/O instead of CPU work. max_workers = (os.cpu_count() or 1) * 5 timeout ''' import concurrent.futures U, T, N, F = py.importUTNF() r = [] max_workers = U.get_duplicated_kargs(ka, 'max_thread', 'threads', 'thread_count', default=max_workers) print_log = U.get_duplicated_kargs(ka, 'P', 'print', 'p', default=print_log) pool = U.get_or_set('U.ThreadPoolExecutor', lazy_default=lambda: U.ThreadPoolExecutor()) if print_log: U.pprint(pool._threads) pool._threads.clear() pool._shutdown = False # pool._max_workers=max_workers with pool: future_to_url = { pool.submit(request, url, **request_ka): url for url in targets } for future in concurrent.futures.as_completed(future_to_url): url = future_to_url[future] try: data = future.result() r.append([ U.StrRepr(url, size=15), data, data.elapsed.total_seconds() ]) except Exception as exc: if print_log: print('### %r %s' % (url, exc)) else: if print_log: print('%r %s' % (url, data)) print(U.stime(), '### len', U.len(targets, r)) return r
def download(url, file_path='', default_dir=py.No('set_input', no_raise=1), headers=None, proxies=AUTO_GET_PROXY, **ka): import requests, sys, os U, T, N, F = py.importUTNF() proxies, ka = auto_proxy(proxies, ka) if not headers: headers = {} if not F.isabs(file_path) and not default_dir: default_dir = 'D:/' if U.is_linux() or U.is_mac(): default_dir = U.gst + 'download/' default_dir = U.get_or_input('download.default_dir', default=default_dir) if not file_path: file_path = default_dir + T.filename_legalized(url.split('/')[-1]) print('save_to:', file_path) # 屏蔽warning信息 # requests.packages.urllib3.disable_warnings() # 第一次请求是为了得到文件总大小 r1 = requests.get( url, stream=True, verify=False, headers=headers, proxies=proxies, ) total_size = int(r1.headers['Content-Length']) # 这重要了,先看看本地文件下载了多少 if os.path.exists(file_path): temp_size = os.path.getsize(file_path) # 本地已经下载的文件大小 else: temp_size = 0 # 显示一下下载了多少 print(temp_size, '/', total_size) # 核心部分,这个是请求下载时,从本地文件已经下载过的后面下载 headers['Range'] = 'bytes=%d-' % temp_size # 重新请求网址,加入新的请求头的 r = requests.get( url, stream=True, verify=False, headers=headers, proxies=proxies, ) # 下面写入文件也要注意,看到"ab"了吗? # "ab"表示追加形式写入文件 with open(file_path, "ab") as f: for chunk in r.iter_content(chunk_size=1024): if chunk: temp_size += len(chunk) f.write(chunk) f.flush() ###这是下载实现进度显示#### done = int(50 * temp_size / total_size) sys.stdout.write("\r[%s%s] %d%%" % ('█' * done, ' ' * (50 - done), 100 * temp_size / total_size)) sys.stdout.flush() print() # 避免上面\r 回车符
def download_one_page_list(url, headers={}, **ka): U, T, N, F = py.importUTNF() return