Exemplo n.º 1
0
            browser.reload()
        browser.click_link_by_text('查找')
        while not browser.is_element_present_by_css('.talent_rankinglist .tab .cur a', wait_time=10):
            print 'prov and city page reload'
            browser.reload()

        baseurl = browser.url
        for i in range(11)[1:]:
            url = baseurl + '&page=' + str(i) + '&'
            print url
            while True:
                try:
                    browser.visit(url)
                    break
                except Exception:
                    continue
            soup = BeautifulSoup(browser.html)
            ol = soup.find('ol')
            if ol is None:
                print 'no this page'
                break
            soup = BeautifulSoup(str(ol))
            for child in soup.findAll('a',{'class':'name'}):
                try:
                    print re.search(r'sinaimg.cn/([0-9]*)/',soup.find('img',{'title':child.string}).get('src')).group(1)
                    daren_uids.intappend(re.search(r'sinaimg.cn/([0-9]*)/',soup.find('img',{'title':child.string}).get('src')).group(1))
                except Exception:
                    pass
        print city[1].encode('utf-8') + ' is Done'

Exemplo n.º 2
0
        continue

while len(browser.title) == 18:
    print "wait for homepage"
    time.sleep(0.5)

for letter in range(97, 123):
    for prov in SPROV:
        for pages in range(1, 11):
            url = "http://verified.weibo.com/fame/%s/?rt=4&srt=3&province=%s&page=%s" % (
                chr(letter),
                prov[1],
                str(pages),
            )
            print url
            browser.visit(url)
            while not browser.is_element_present_by_css(".categories_list .titlebar", wait_time=10):
                print "page reload"
                browser.reload()
            soup = BeautifulSoup(browser.html)
            soup = BeautifulSoup(str(soup.find("div", {"class": "detail"})))
            for i in soup.find_all("input"):
                print i["value"]
                famous_uids.intappend(i["value"])
            soup = BeautifulSoup(browser.html)
            ifnext = soup.find("div", {"class": "W_pages W_pages_comment"})

            if str(ifnext).find("下一页") == -1:
                print "no more page"
                break