def grab_tag_apps( tagurl, thetagid ): page = 1 running = True while running: realtagurl = '%s%d' % (tagurl, page) print '开始抓取:' + realtagurl html = urllib2.urlopen(realtagurl).read() # soup soup = BeautifulSoup( html ) # li节点 lis = soup.findAll('img', {'class':'icon'}) if lis == None or len(lis)==0: print '没找到相关icon的li !' return # 寻找其中的appicon的link apphrefs = [] for ali in lis: href = ali.parent['href'] apphrefs.append(href); # 根据这个href去appstore抓取 print 'one href:' print href print 'href个数:%d' % (len(apphrefs)) # 将本页的所有链接解析并写入数据库 appstore.get_appinfo_with_ids( apphrefs, thetagid ) # 下一页 page = page+1
# -*- coding: utf-8 -*- import urllib2 from BeautifulSoup import BeautifulSoup import urlparse from datetime import datetime import appstore if __name__ == '__main__': index = 13 links = [] for index in range(691, 897): url = 'http://www.iosinspires.me/category/appicons/post/%d' % index html = urllib2.urlopen(url).read() soup = BeautifulSoup(html) link = soup.find('li', {'class':'itemActionItunes'}).contents[1]['href'] print index, link links.append(link); if index % 10 == 0 : print '------start-----' appstore.get_appinfo_with_ids( links, 19 ) print '------done------' links = [] print '------start-----' appstore.get_appinfo_with_ids( links, 19 ) print '------done------' links = []