Пример #1
0
def sale_list_find(): #活动图书列表
	global sale_list_link_old
	global sale_list_link_new
	home = "http://www.queshu.com"
	try:
		sale_list_link_diff = set(sale_list_link_old) - set(sale_list_link_new) #从活动列表及活动图书中删除旧记录
		for item in set(sale_list_link_diff):
			result = Promotion.objects.filter(promotionID=item) #FIX 改成外键约束
			result.delete()
			result = PromotionBookList.objects.filter(promotionID=item)
			result.delete()
			result = BookPriceList.objects.filter(promotionID=item)
			result.delete()

		sale_list_link_diff = set(sale_list_link_new) - set(sale_list_link_old) #添加新活动到列表
		for item in set(sale_list_link_diff):
			promotionID = item
			sale_list_link = sale_list_link_new[item] #活动图书列表页

			response = urllib2.urlopen(urllib2.Request(sale_list_link))
			soup = BeautifulSoup(response, "html.parser")
			for jianlou_book in soup.find_all(id="jianlou_book"):
				img120 = jianlou_book.find(class_="img120")
				bookImageLink = img120["src"] #图书图片链接
				bookName = img120["alt"] #图书名称
				book_right_line = jianlou_book.find_all(class_="book_right_line")
				bookDetailLink = home + book_right_line[0].contents[0]["href"] #图书详情页链接
				bookPrice = book_right_line[3].find(class_="xianjia").string #图书价格
				
				response = urllib2.urlopen(urllib2.Request(bookDetailLink))
				soup = BeautifulSoup(response, "html.parser")
				book_info = soup.find(class_="book_info")
				bookISBN = book_info.find(text=re.compile(r"\d{13}")) #图书ISBN
				for price_item in soup.find_all(class_="price_item"): #图书价格列表
					book_price = price_item.find(class_="book_price_price")
					if not book_price: #可能为空
						continue

					book_site = price_item.find(class_="book_site")
					bookLink = home + book_site.contents[0]["href"]
					bookSaler = home + book_site.contents[0].contents[0]["src"]
					bookCurrentPrice = book_price.contents[0]
					if repr(bookCurrentPrice)[0] != 'u': #无规则数据
						continue
					bookCurrentPrice = bookCurrentPrice + book_price.contents[1].string

					result = BookPriceList(
						bookISBN         = bookISBN,
						bookSaler        = bookSaler,
						bookCurrentPrice = bookCurrentPrice,
						bookLink         = bookLink)
					result.save()

				result = PromotionBookList(
						promotionID            = promotionID,
						promotionBookISBN      = bookISBN,
						promotionBookName      = bookName,
						promotionBookImageLink = bookImageLink,
						promotionBookPrice     = bookPrice)
				result.save()
		sale_list_link_old = sale_list_link_new
		sale_list_link_new = {}
	except urllib2.URLError, e:
		print e.reason