Пример #1
0
def sale_list_find(): #促销图书列表
    global sale_list_link_old
    global sale_list_link_new
    try:
        if sale_list_link_new == sale_list_link_old:
            return
        home = "http://www.queshu.com"

        for key, value in sale_list_link_new.items(): #参加活动图书列表
            promotionID = key
            promotionBookSearchLink = value + "?c=" #活动图书检索

            response = urllib2.urlopen(urllib2.Request(value))
            soup = BeautifulSoup(response, "html.parser")
            for jinalou_book_right in soup.find_all(id="jinalou_book_right"):
                xianjia = jinalou_book_right.find(class_="xianjia")
                promotionBookCurrentPrice = xianjia #图书促销价格
                if not promotionBookCurrentPrice: #可能为空
                    continue
                promotionBookCurrentPrice = promotionBookCurrentPrice.string

                promotionBookPrice = jinalou_book_right.contents[3].contents[0].string.encode('utf-8') #图书定价
                promotionBookPrice = promotionBookPrice[promotionBookPrice.find(":") + 3:]

                book = home + jinalou_book_right.contents[0].contents[0]["href"]  #活动图书详情
                response = urllib2.urlopen(urllib2.Request(book))
                soup = BeautifulSoup(response, "html.parser")

                promotionBookISBN = soup.find(text=re.compile(r"\d{13}"))[-13:]  #图书ISBN
                for price_item in soup.find_all(class_="price_item"): #图书价格列表
                    book_site = home + price_item.find(class_="book_site").contents[0].contents[0]["src"]
                    book_price = price_item.find(class_="book_price_price")
                    if not book_price: #可能为空
                        continue

                    bookSaler = book_site
                    bookCurrentPrice = book_price.contents[0]
                    if repr(bookCurrentPrice)[0] != 'u': #无规则数据
                        continue
                    bookCurrentPrice = bookCurrentPrice + book_price.contents[1].string

                    result = BookPriceList(
                        bookISBN         = promotionBookISBN,
                        bookSaler        = bookSaler,
                        bookCurrentPrice = bookCurrentPrice)
                    result.save()

                result = PromotionBookList(
                        promotionID               = promotionID,
                        promotionBookISBN         = promotionBookISBN,
                        promotionBookPrice        = promotionBookPrice,
                        promotionBookCurrentPrice = promotionBookCurrentPrice,
                        promotionBookSearchLink   = promotionBookSearchLink)
                result.save()
        sale_list_link_old = sale_list_link_new
        sale_list_link_new = {}
    except urllib2.URLError, e:
        print e.reason
Пример #2
0
def sale_list_find(): #促销图书列表
	global sale_list_link_old
	global sale_list_link_new
	home = "http://www.queshu.com"
	try:
		sale_list_link_diff = set(sale_list_link_old) - set(sale_list_link_new) #从列表中删除旧活动
		for item in set(sale_list_link_diff):
			result = Promotion.objects.filter(promotionID=item)
			result.delete()
			result = PromotionID.objects.filter(promotionID=item)
			result.delete()

		sale_list_link_diff = set(sale_list_link_new) - set(sale_list_link_old) #添加新活动到列表
		for item in set(sale_list_link_diff):
			promotionID = item
			sale_list_link = sale_list_link_new[item]
			promotionBookSearchLink = sale_list_link + "?c=" #活动图书检索链接

			response = urllib2.urlopen(urllib2.Request(sale_list_link))
			soup = BeautifulSoup(response, "html.parser")
			for jinalou_book_right in soup.find_all(id="jinalou_book_right"):
				xianjia = jinalou_book_right.find(class_="xianjia")
				promotionBookCurrentPrice = xianjia #图书促销价格
				if not promotionBookCurrentPrice: #可能为空
					continue
				promotionBookCurrentPrice = promotionBookCurrentPrice.string

				promotionBookPrice = jinalou_book_right.contents[3].contents[0].string.encode('utf-8') #图书定价
				promotionBookPrice = promotionBookPrice[promotionBookPrice.find(":") + 3:]

				book = home + jinalou_book_right.contents[0].contents[0]["href"]  #活动图书详情
				response = urllib2.urlopen(urllib2.Request(book))
				soup = BeautifulSoup(response, "html.parser")

				promotionBookISBN = soup.find(text=re.compile(r"\d{13}"))[-13:]  #图书ISBN
				for price_item in soup.find_all(class_="price_item"): #图书价格列表
					book_site = home + price_item.find(class_="book_site").contents[0].contents[0]["src"]
					book_price = price_item.find(class_="book_price_price")
					if not book_price: #可能为空
						continue

					bookSaler = book_site
					bookCurrentPrice = book_price.contents[0]
					if repr(bookCurrentPrice)[0] != 'u': #无规则数据
						continue
					bookCurrentPrice = bookCurrentPrice + book_price.contents[1].string

					result = BookPriceList(
						bookISBN		 = promotionBookISBN,
						bookSaler		= bookSaler,
						bookCurrentPrice = bookCurrentPrice)
					result.save()

				result = PromotionBookList(
						promotionID				= promotionID,
						promotionBookISBN		 = promotionBookISBN,
						promotionBookPrice		= promotionBookPrice,
						promotionBookCurrentPrice = promotionBookCurrentPrice,
						promotionBookSearchLink	= promotionBookSearchLink)
				result.save()
		sale_list_link_old = sale_list_link_new
		sale_list_link_new = {}
	except urllib2.URLError, e:
		print e.reason
Пример #3
0
def sale_list_find(): #活动图书列表
	global sale_list_link_old
	global sale_list_link_new
	home = "http://www.queshu.com"
	try:
		sale_list_link_diff = set(sale_list_link_old) - set(sale_list_link_new) #从列表中删除旧活动
		for item in set(sale_list_link_diff):
			result = Promotion.objects.filter(promotionID=item)
			result.delete()
			result = Promotion.objects.filter(promotionID=item)
			result.delete()

		sale_list_link_diff = set(sale_list_link_new) - set(sale_list_link_old) #添加新活动到列表
		for item in set(sale_list_link_diff):
			promotionID = item
			sale_list_link = sale_list_link_new[item] #活动图书列表页

			response = urllib2.urlopen(urllib2.Request(sale_list_link))
			soup = BeautifulSoup(response, "html.parser")
			for jianlou_book in soup.find_all(id="jianlou_book"):
				img120 = jianlou_book.find(class_="img120")
				bookImageLink = img120["src"] #图书图片链接
				bookName = img120["alt"] #图书名称
				book_right_line = jianlou_book.find_all(class_="book_right_line")
				bookDetailLink = home + book_right_line[0].contents[0]["href"] #图书详情页链接
				bookPrice = book_right_line[3].find(class_="xianjia").string #图书价格
				
				response = urllib2.urlopen(urllib2.Request(bookDetailLink))
				soup = BeautifulSoup(response, "html.parser")
				book_info = soup.find(class_="book_info")
				bookISBN = book_info.find(text=re.compile(r"\d{13}")) #图书ISBN
				for price_item in soup.find_all(class_="price_item"): #图书价格列表
					book_price = price_item.find(class_="book_price_price")
					if not book_price: #可能为空
						continue

					book_site = price_item.find(class_="book_site")
					bookLink = home + book_site.contents[0]["href"]
					bookSaler = home + book_site.contents[0].contents[0]["src"]
					bookCurrentPrice = book_price.contents[0]
					if repr(bookCurrentPrice)[0] != 'u': #无规则数据
						continue
					bookCurrentPrice = bookCurrentPrice + book_price.contents[1].string

					result = BookPriceList(
						bookISBN         = bookISBN,
						bookSaler        = bookSaler,
						bookCurrentPrice = bookCurrentPrice,
						bookLink         = bookLink)
					result.save()

				result = PromotionBookList(
						promotionID            = promotionID,
						promotionBookISBN      = bookISBN,
						promotionBookName      = bookName,
						promotionBookImageLink = bookImageLink,
						promotionBookPrice     = bookPrice)
				result.save()
		sale_list_link_old = sale_list_link_new
		sale_list_link_new = {}
	except urllib2.URLError, e:
		print e.reason
Пример #4
0
def sale_list_find(): #活动图书列表
	global sale_list_link_old
	global sale_list_link_new
	home = "http://www.queshu.com"
	try:
		sale_list_link_diff = set(sale_list_link_old) - set(sale_list_link_new) #从活动列表及活动图书中删除旧记录
		for item in set(sale_list_link_diff):
			result = Promotion.objects.filter(promotionID=item) #FIX 改成外键约束
			result.delete()
			result = PromotionBookList.objects.filter(promotionID=item)
			result.delete()
			result = BookPriceList.objects.filter(promotionID=item)
			result.delete()

		sale_list_link_diff = set(sale_list_link_new) - set(sale_list_link_old) #添加新活动到列表
		for item in set(sale_list_link_diff):
			promotionID = item
			sale_list_link = sale_list_link_new[item] #活动图书列表页

			response = urllib2.urlopen(urllib2.Request(sale_list_link))
			soup = BeautifulSoup(response, "html.parser")
			for jianlou_book in soup.find_all(id="jianlou_book"):
				img120 = jianlou_book.find(class_="img120")
				bookImageLink = img120["src"] #图书图片链接
				bookName = img120["alt"] #图书名称
				book_right_line = jianlou_book.find_all(class_="book_right_line")
				bookDetailLink = home + book_right_line[0].contents[0]["href"] #图书详情页链接
				bookPrice = book_right_line[3].find(class_="xianjia").string #图书价格
				
				response = urllib2.urlopen(urllib2.Request(bookDetailLink))
				soup = BeautifulSoup(response, "html.parser")
				book_info = soup.find(class_="book_info")
				bookISBN = book_info.find(text=re.compile(r"\d{13}")) #图书ISBN
				for price_item in soup.find_all(class_="price_item"): #图书价格列表
					book_price = price_item.find(class_="book_price_price")
					if not book_price: #可能为空
						continue

					book_site = price_item.find(class_="book_site")
					bookLink = home + book_site.contents[0]["href"]
					bookSaler = home + book_site.contents[0].contents[0]["src"]
					bookCurrentPrice = book_price.contents[0]
					if repr(bookCurrentPrice)[0] != 'u': #无规则数据
						continue
					bookCurrentPrice = bookCurrentPrice + book_price.contents[1].string

					result = BookPriceList(
						bookISBN         = bookISBN,
						bookSaler        = bookSaler,
						bookCurrentPrice = bookCurrentPrice,
						bookLink         = bookLink)
					result.save()

				result = PromotionBookList(
						promotionID            = promotionID,
						promotionBookISBN      = bookISBN,
						promotionBookName      = bookName,
						promotionBookImageLink = bookImageLink,
						promotionBookPrice     = bookPrice)
				result.save()
		sale_list_link_old = sale_list_link_new
		sale_list_link_new = {}
	except urllib2.URLError, e:
		print e.reason