def fetch(furl): print 'fetch ' + furl wwwp = re.compile(r'(http://)?([^/]*)(/?.*)(\d{6})') #html = get(url) html = pq(url=furl)('.maxPicList').html() if html: hp = MyHTMLParser() hp.feed(html) hp.close() for link in hp.links: m = wwwp.match(link) if m: m.group(4) myurls.append(m.group(1)+m.group(2)+'/detail/apply/'+m.group(4)+'/?callback=?')
def fetch(furl): print 'fetch ' + furl wwwp = re.compile(r'(http://)?([^/]*)(/?.*)(\d{6})') #html = get(url) html = pq(url=furl)('.maxPicList').html() if html: hp = MyHTMLParser() hp.feed(html) hp.close() for link in hp.links: m = wwwp.match(link) if m: m.group(4) myurls.append( m.group(1) + m.group(2) + '/detail/apply/' + m.group(4) + '/?callback=?')
def fetch(furl): print 'fetch ' + furl + str(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) wwwp = re.compile(r'(http://)?([^/]*)(/?.*)(\d{6})') #html = get(url) html = pq(url=furl)('.maxPicList').html() #md5.update(html.encode('utf8')) #keytxt = md5.hexdigest() #global lastdigest #print lastdigest #if lastdigest == keytxt: # return #lastdigest = keytxt if html: hp = MyHTMLParser() hp.feed(html) hp.close() for link in hp.links: m = wwwp.match(link) if m: m.group(4) myurls.append(m.group(1)+m.group(2)+'/detail/apply/'+m.group(4)+'/?callback=?')
def fetch(furl): print 'fetch ' + furl + str( time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) wwwp = re.compile(r'(http://)?([^/]*)(/?.*)(\d{6})') #html = get(url) html = pq(url=furl)('.maxPicList').html() #md5.update(html.encode('utf8')) #keytxt = md5.hexdigest() #global lastdigest #print lastdigest #if lastdigest == keytxt: # return #lastdigest = keytxt if html: hp = MyHTMLParser() hp.feed(html) hp.close() for link in hp.links: m = wwwp.match(link) if m: m.group(4) myurls.append( m.group(1) + m.group(2) + '/detail/apply/' + m.group(4) + '/?callback=?')