def Get_href(self,url): RedisQueue=RedisQueue_master1.getredisQueuev2('yingjiesheng_href1') f1=open('href.txt','ar+') proxy=None count=10 while True: try: #proxy=None myPage=webutil.request(url,timeout=10,proxy=proxy).decode('gbk').encode('utf-8') break except Exception as e3: print e3 proxy = proxyutils.choice_proxy(is_debug=False,host="master1",port=8880) if count<=0: raise Exception(u"连续10次失败,放弃") count-=1 time.sleep(1) try: pp=re.findall('<a href="(.*?)" target="_blank"><span style="color:(.*?);">(.*?)</a>(.*?)</td>(.*?)<td class="date">(.*?)</td>',myPage,re.S) for i in pp: m=i[0] p=m.split('href="')[-1] if p.find('http://')==-1: href='http://www.yingjiesheng.com'+p else: href=p if href not in f1.read(): print href,i[-1] hh=href+'BBD'+i[-1] RedisQueue.put(hh) f1.write(href) f1.write('\r\n') f1.write(i[-1]) f1.write('\r\n') print len(pp) f1.close() except Exception as e2: logging.error("product id:%s" %e2) print e2 pass
def run(): ss=Get_Message() try: RedisQueue=RedisQueue_master1.getredisQueuev2('yingjiesheng_href1') while(not RedisQueue.empty()): href=RedisQueue.getv2() if href: href=href.split('BBD') url=href[0] print url date=href[1] ss.Get_message(url,date) else: pass #raw_input('sssssssssss') except Exception as e2: print e2 logging.error("url:%s" % url+time.ctime()) pass