def duowanfunc(): girlurl=rq_girl.get() r=requests.get(girlurl) soup=BeautifulSoup(r.text,"html.parser") # girlpage=soup.find('td',id=re.compile('postmessage.*'),attrs={'class':'t_f'}) girlpage=soup.find('div',attrs={'class':'t_fsz'}) if not girlpage: return IDkeys=['ID','id'] # 提取妹纸id p=re.compile('(?:id|ID)[\s|:|:|;|;]*(\S+)\s*') girlpagetext=girlpage.text nickname=p.findall(girlpagetext) # 判断是否存在id关键字 if len(nickname)<1: return False # 判断有没有照片 pics=girlpage.find_all('img',zoomfile=True) if len(pics)<1: return False # nickname=nickname[0] # 保存图片url if len(pics)>5: pics=pics[:5] piclist=[utils.quote_url(pic.attrs['zoomfile']) for pic in pics] return {'nickname':nickname,'picurls':piclist}
def duowanlol1(): pre_url=r"http://bbs.duowan.com/" url="http://bbs.duowan.com/forum.php?mod=forumdisplay&fid=1343&orderby=dateline&typeid=7092&filter=author&orderby=dateline&typeid=7092&page=%s" i=0 while True: time.sleep(1) # 计数循环 i+=1 if i==1000: return purl=url % i r=requests.get(purl) p=re.compile('求封面</a>\]</em> <span id="thread_\d+?"><a href="(.+?)"') # urls=[pre_url+x for x in p.findall(r.text)] urls=p.findall(r.text) if urls is None: return for x in urls: tmp=pre_url+utils.quote_url(x) rq.put(tmp) print(tmp) print('=========duowan1-',i,' =========')
def qqlol(): pre_url=r"http://bbs.lol.qq.com/" url="http://bbs.lol.qq.com/forum.php?mod=forumdisplay&fid=205&typeid=966&typeid=966&filter=typeid&page=%d" i=0 while True: time.sleep(1) # 计数循环 i+=1 if i==1000: return purl=url % i r=requests.get(purl) p=re.compile('我要曝照</a>\]</em> <a href="(.+?)"') # urls=[pre_url+x for x in p.findall(r.text)] urls=p.findall(r.text) if urls is None: return for x in urls: tmp=pre_url+utils.quote_url(x) rq.put(tmp) print(tmp) print('=========qqlol-',i,' =========')
def duowanlol1(): pre_url = r"http://bbs.duowan.com/" url = "http://bbs.duowan.com/forum.php?mod=forumdisplay&fid=1343&orderby=dateline&typeid=7092&filter=author&orderby=dateline&typeid=7092&page=%s" i = 0 while True: time.sleep(1) # 计数循环 i += 1 if i == 1000: return purl = url % i r = requests.get(purl) p = re.compile('求封面</a>\]</em> <span id="thread_\d+?"><a href="(.+?)"') # urls=[pre_url+x for x in p.findall(r.text)] urls = p.findall(r.text) if urls is None: return for x in urls: tmp = pre_url + utils.quote_url(x) rq.put(tmp) print(tmp) print('=========duowan1-', i, ' =========')
def qqlol(): pre_url = r"http://bbs.lol.qq.com/" url = "http://bbs.lol.qq.com/forum.php?mod=forumdisplay&fid=205&typeid=966&typeid=966&filter=typeid&page=%d" i = 0 while True: time.sleep(1) # 计数循环 i += 1 if i == 1000: return purl = url % i r = requests.get(purl) p = re.compile('我要曝照</a>\]</em> <a href="(.+?)"') # urls=[pre_url+x for x in p.findall(r.text)] urls = p.findall(r.text) if urls is None: return for x in urls: tmp = pre_url + utils.quote_url(x) rq.put(tmp) print(tmp) print('=========qqlol-', i, ' =========')