def rty(rows=None, **ka): if not rows: rows = grows rt = '' dcol_range = {} for k in py.list(ka): for nc, c in py.enumerate(k): if c in T._09: i = py.int(c) v = ka.pop(k) if py.isnum(v): dcol_range[i] = [v, U.IMAX] elif py.istr(v): if k[nc - 1] == '_': dcol_range[-i] = v else: dcol_range[i] = v elif py.len(v) == 2: dcol_range[i] = v else: raise py.ArgumentError(' col range must int or [a,b] ') break if dcol_range: print(' dcol_range:', dcol_range) for index, row in enumerate(U.sort(rows, **ka)): _continue = 0 for ic, ran in dcol_range.items(): # rt+=f'# {row}[{ic}]<hr>' # _continue=0 if ic >= 0 and py.istr(row[ic]) and py.istr(ran): if not ran in row[ic]: _continue = 1 if ic < 0 and py.istr(row[ic]) and py.istr(ran): if ran in row[ic]: _continue = 1 if py.isnum(row[ic]) and py.isnum(ran[0]): if not ran[0] <= row[ic] <= ran[1]: _continue = 1 if _continue: continue if 'item.taobao.com/item.htm?id=' in row[-3]: row[-3] = T.sub(row[-3], 'id=', '') tb = f'''{row[-1]} <span>{row[:4]}</span> <br> <a target="_blank" href="taobao://item.taobao.com/item.htm?id={row[-3]}">{'%4s'%index} {row[-2]} </a> <br><hr> ''' rt += tb return rt
def request(url, method='GET', headers=gheaders, proxies=AUTO_GET_PROXY, verify=False, no_raise=False, print_req=False, **ka): ''' ''' import requests U, T, N, F = py.importUTNF() proxies, ka = auto_proxy(proxies, ka) print_req = U.get_duplicated_kargs(ka, 'show', 'print', 'p', 'print_req', default=print_req) if (py.istr(url) and url.upper() in HTTP_METHODS): # ka.pop('method','')#D.pop(k[,d]) -> v, ka['method'] = url url = ka['url'] # test url exists elif method: ka['method'] = method if headers: ka['headers'] = headers for k in py.list(ka): v = ka[k] if py.istr(k) and py.istr(v): if k[0].isupper(): ka.pop(k) headers[k] = v ka['verify'] = verify if url and 'url' not in ka: ka['url'] = url ka['url'] = N.auto_url(ka['url']) if print_req: print(U.v.requests.request(**ka)) if no_raise: try: return requests.request(**ka) except Exception as e: return py.No(e, ka) return requests.request(**ka)
def setIndex(indexName): '''#始终加载在此进程中第一次设置(或手动 sys.gsIndex='') ''' if not py.istr(indexName): raise py.ArgumentError('must str') s = U.get('es_index') if s: globals()['gsIndex'] = s
def route(path="/", method=["GET"], h=True, args=True): if not args: h = False if py.istr(method): method = method.upper() if method in register_route: method = [method] else: raise Exception('unsupported method:' + method) def decorator(f): for m in method: m = m.upper() try: if h: register_route[m][path] = f else: def wrap(handler, *a, **ka): return f(*a, **ka) register_route[m][path] = wrap #lambda handler:f() except: logging.error("{0} method is not available.".format(m)) return f return decorator
def receive(request): data = T.json_loads(request.get_data()) if not py.istr(data): F.dill_dump(file=cache_path + 'dill/' + gid, obj=data) return U.set_dict_value_list(did_err, gid, data) if gid in did: return U.set_dict_value_list(did2, gid, data) did[gid] = F.write(cache_path + gid + '.html', data) return gid
def get_one_article(pi): if py.istr(pi) and '/p/' in pi: pi=T.sub(pi,'/p/','') if py.isint(pi): pi=py.str(pi) h=N.HTTP.getBytes("https://rss.lilydjwg.me/static_zhihu/"+pi) h=h.decode("utf8") t=T.html2text(h) t=T.filterZh(t,11) return t
async def chrome_devtools_protocol_send(url, astr, params={}, id=0): astr = astr.strip() if not id: id = U.ct() if py.istr(astr) and not (astr.startswith('{') and astr.endswith('}')): astr = py.dict( id=id, method=astr, params=params, ) if py.isdict(astr): astr = T.json_dumps(astr) return astr, await websocket_client_send(url, astr)
def replace_raw(raw_text, regex=True): for i, v in gdraw.items(): if len(i) == 2 and isinstance(i, tuple): if not regex: continue if i[0] in raw_text: raw_text = T.regexReplace(raw_text, i[1], str(v)) else: if py.istr(i): raw_text = raw_text.replace(str(i), str(v)) else: U.log(['unknow gdraw', i, v]) return raw_text
def zhihu_question(id): global c, next_url, answers, html, next_url if py.istr(id): id = T.filterInt(id, range(8, 11))[0] start_url = 'https://www.zhihu.com/api/v4/questions/' + str( id ) + '/answers?include=data%5B%2A%5D.is_normal%2Cadmin_closed_comment%2Creward_info%2Cis_collapsed%2Cannotation_action%2Cannotation_detail%2Ccollapse_reason%2Cis_sticky%2Ccollapsed_by%2Csuggest_edit%2Ccomment_count%2Ccan_comment%2Ccontent%2Ceditable_content%2Cvoteup_count%2Creshipment_settings%2Ccomment_permission%2Ccreated_time%2Cupdated_time%2Creview_info%2Crelevant_info%2Cquestion%2Cexcerpt%2Crelationship.is_authorized%2Cis_author%2Cvoting%2Cis_thanked%2Cis_nothelp%3Bdata%5B%2A%5D.mark_infos%5B%2A%5D.url%3Bdata%5B%2A%5D.author.follower_count%2Cbadge%5B%2A%5D.topics&limit=5&offset=0&sort_by=default' import requests from bs4 import BeautifulSoup import json headers = { 'accept-language': 'zh-CN,zh;q=0.9', 'origin': 'https://www.zhihu.com', 'referer': 'https://www.zhihu.com/question/290268306', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36' } next_url = [start_url] answers = [] for url in next_url: try: html = requests.get(url, headers=headers, verify=False) html.encoding = html.apparent_encoding soup = BeautifulSoup(html.text, "lxml") content = str(soup.p).split("<p>")[1].split("</p>")[0] c = json.loads(content) answers += pq(c) next_url.append(c["paging"]["next"]) if c["paging"]["is_end"]: break except Exception as e: globals()['err'] = e continue # for item in answers: # print(item) title = '' try: title = c['data'][0]['question']['title'] + f' | {len(answers)}' except: pass U.log(title) r = '' for i, v in enumerate(answers): r += f'\n[第{i+1}] {T.filterZh(v[0],11)} {v[1]}' return title + r
def open_private_key(f, ): ''' paramiko.RSAKey.from_private_key(open_private_key(f),) == <paramiko.rsakey.RSAKey at 0x23218fda648> ''' if py.isfile(f): file = f elif not py.istr(f): raise py.ArgumentError('key f type must file or str', f) elif F.exist(f): file = py.open(f) elif py.len(f) < 256 and 'PRIVATE KEY' not in f.upper(): raise py.ArgumentError( 'private_key f str format Error ! Or [%s] not exist' % f) else: from io import StringIO file = StringIO(f) return file
async def press_keys(page, *s, xy=(None, None)): from qgb import Win if len(s) == 1 and py.istr(s[0]): key_defs = U.get(u_key_defs) if not key_defs: key_defs = [] for k, v in pyppeteer.us_keyboard_layout.keyDefinitions.items(): if py.len(k) > 1: key_defs.append(k) U.set(u_key_defs, key_defs) ## cache end if s not in key_defs: # if in, s=tuple(skey) s = s[0] xy = Win.click(*xy) for k in s: await page.keyboard.press(k) return s, xy
def taobao_sku(d): '''item html : Hub.config.set('sku', { valCartInfo : { itemId : '355190223 cartUrl: '//cart.ta }, apiRelateMarket : '//t apiAddCart : '//c apiInsurance : '', wholeSibUrl : '//d areaLimit : '', bigGroupUrl : '', valPostFee : '', coupon : { couponApi : couponWidgetDomain: cbUrl : }, valItemInfo : { #这里开始 大概 1424 行 defSelected: -1, skuMap : {";162} ,propertyMemoMap: {...} } # 这个 结束 ''' if py.istr(d): d = T.load_js_obj(d) m = d['propertyMemoMap'] skuMap = d['skuMap'] r = [] for i, name in m.items(): row = [] pi = skuMap[';{};'.format( i )] # {'oversold': False, 'price': '1.80', 'skuId': '4451749127064', 'stock': '2'} row = [ U.FloatRepr(pi['price'], size=8), U.StrRepr(i, size=20), U.StrRepr(name), ] if pi['oversold']: row.append('oversold') r.append(row) return r
def search2xls(body, index=gsIndex, fields=('url', 'title', 'channel', 'column_classify', 'datetime'), max_str_len=555): ''' Exception: String longer than 32767 characters ''' zh = T.filter_zh(py.repr(body)) es_data = es.search(index=index, body=body)['hits']['hits'] r = [] for i in es_data: # if py.islist(i): # U.log(i) # continue s = i['_source'] row = [] for c in fields: c = s[c] if py.istr(c): c = c[:max_str_len] row.append(c) r.append(row) return F.write_xls('qgb-es-{}-{}.xls'.format(zh, py.len(r)), r)
def load(s,skip_err_load=0): if py.istr(s): ls=[i.strip() for i in s.split(',')] if py.islist(s): ls=s print(ls) r=[] dlogin=U.get_or_set('lc.login',{}) for i in ls: if not i:continue f=r'C:/test/{}@qgbcs.uu.me.dill'.format(i) q=F.dill_load(f) if not q: print('#Err',f,repr(q)) if skip_err_load: continue else: raise q dlogin[i]=q r.append(q) return r
async def get_page(page=None, url=py.No(URL_TRADE_LIST), wait=None, browser=None): global JS_ONLOAD # py.pdb()() if py.istr(page) and not url: # and ('://' in page) url, page = page, None if not browser: browser = await get_browser() if not page and not url: return (await browser.pages())[-1] if not url: url = URL_TRADE_LIST if not wait and not page: page = await _get_page_by_url(browser, url) if wait and not page: start = U.timestamp() while not page: page = await _get_page_by_url(browser, url) await A.sleep(py.max(0.1, wait / 10)) if U.timestamp() - start > wait: return py.No('%s sec timeout!,can not found page.url:%s' % (wait, url)) return U.set('page', page)
def filter_args(args_dict): ''' pymysql.connect(**ka={ host(str): MySQL服务器地址 port(int): MySQL服务器端口号 user(str): 用户名 passwd(str): 密码 db(str): 数据库名称 charset(str): 连接编码 } ''' r={} for k,v in args_dict.items(): if not py.istr(k):continue k=k.lower() if k=='port': v=py.int(v) # importU(). if k=='PASSWORD'.lower():k='passwd' if k=='NAME'.lower() :k='db' if k in gargs_list: r[k]=v return r