def sale_list_find(): #促销图书列表 global sale_list_link_old global sale_list_link_new try: if sale_list_link_new == sale_list_link_old: return home = "http://www.queshu.com" for key, value in sale_list_link_new.items(): #参加活动图书列表 promotionID = key promotionBookSearchLink = value + "?c=" #活动图书检索 response = urllib2.urlopen(urllib2.Request(value)) soup = BeautifulSoup(response, "html.parser") for jinalou_book_right in soup.find_all(id="jinalou_book_right"): xianjia = jinalou_book_right.find(class_="xianjia") promotionBookCurrentPrice = xianjia #图书促销价格 if not promotionBookCurrentPrice: #可能为空 continue promotionBookCurrentPrice = promotionBookCurrentPrice.string promotionBookPrice = jinalou_book_right.contents[3].contents[0].string.encode('utf-8') #图书定价 promotionBookPrice = promotionBookPrice[promotionBookPrice.find(":") + 3:] book = home + jinalou_book_right.contents[0].contents[0]["href"] #活动图书详情 response = urllib2.urlopen(urllib2.Request(book)) soup = BeautifulSoup(response, "html.parser") promotionBookISBN = soup.find(text=re.compile(r"\d{13}"))[-13:] #图书ISBN for price_item in soup.find_all(class_="price_item"): #图书价格列表 book_site = home + price_item.find(class_="book_site").contents[0].contents[0]["src"] book_price = price_item.find(class_="book_price_price") if not book_price: #可能为空 continue bookSaler = book_site bookCurrentPrice = book_price.contents[0] if repr(bookCurrentPrice)[0] != 'u': #无规则数据 continue bookCurrentPrice = bookCurrentPrice + book_price.contents[1].string result = BookPriceList( bookISBN = promotionBookISBN, bookSaler = bookSaler, bookCurrentPrice = bookCurrentPrice) result.save() result = PromotionBookList( promotionID = promotionID, promotionBookISBN = promotionBookISBN, promotionBookPrice = promotionBookPrice, promotionBookCurrentPrice = promotionBookCurrentPrice, promotionBookSearchLink = promotionBookSearchLink) result.save() sale_list_link_old = sale_list_link_new sale_list_link_new = {} except urllib2.URLError, e: print e.reason
def sale_list_find(): #促销图书列表 global sale_list_link_old global sale_list_link_new home = "http://www.queshu.com" try: sale_list_link_diff = set(sale_list_link_old) - set(sale_list_link_new) #从列表中删除旧活动 for item in set(sale_list_link_diff): result = Promotion.objects.filter(promotionID=item) result.delete() result = PromotionID.objects.filter(promotionID=item) result.delete() sale_list_link_diff = set(sale_list_link_new) - set(sale_list_link_old) #添加新活动到列表 for item in set(sale_list_link_diff): promotionID = item sale_list_link = sale_list_link_new[item] promotionBookSearchLink = sale_list_link + "?c=" #活动图书检索链接 response = urllib2.urlopen(urllib2.Request(sale_list_link)) soup = BeautifulSoup(response, "html.parser") for jinalou_book_right in soup.find_all(id="jinalou_book_right"): xianjia = jinalou_book_right.find(class_="xianjia") promotionBookCurrentPrice = xianjia #图书促销价格 if not promotionBookCurrentPrice: #可能为空 continue promotionBookCurrentPrice = promotionBookCurrentPrice.string promotionBookPrice = jinalou_book_right.contents[3].contents[0].string.encode('utf-8') #图书定价 promotionBookPrice = promotionBookPrice[promotionBookPrice.find(":") + 3:] book = home + jinalou_book_right.contents[0].contents[0]["href"] #活动图书详情 response = urllib2.urlopen(urllib2.Request(book)) soup = BeautifulSoup(response, "html.parser") promotionBookISBN = soup.find(text=re.compile(r"\d{13}"))[-13:] #图书ISBN for price_item in soup.find_all(class_="price_item"): #图书价格列表 book_site = home + price_item.find(class_="book_site").contents[0].contents[0]["src"] book_price = price_item.find(class_="book_price_price") if not book_price: #可能为空 continue bookSaler = book_site bookCurrentPrice = book_price.contents[0] if repr(bookCurrentPrice)[0] != 'u': #无规则数据 continue bookCurrentPrice = bookCurrentPrice + book_price.contents[1].string result = BookPriceList( bookISBN = promotionBookISBN, bookSaler = bookSaler, bookCurrentPrice = bookCurrentPrice) result.save() result = PromotionBookList( promotionID = promotionID, promotionBookISBN = promotionBookISBN, promotionBookPrice = promotionBookPrice, promotionBookCurrentPrice = promotionBookCurrentPrice, promotionBookSearchLink = promotionBookSearchLink) result.save() sale_list_link_old = sale_list_link_new sale_list_link_new = {} except urllib2.URLError, e: print e.reason
def sale_list_find(): #活动图书列表 global sale_list_link_old global sale_list_link_new home = "http://www.queshu.com" try: sale_list_link_diff = set(sale_list_link_old) - set(sale_list_link_new) #从列表中删除旧活动 for item in set(sale_list_link_diff): result = Promotion.objects.filter(promotionID=item) result.delete() result = Promotion.objects.filter(promotionID=item) result.delete() sale_list_link_diff = set(sale_list_link_new) - set(sale_list_link_old) #添加新活动到列表 for item in set(sale_list_link_diff): promotionID = item sale_list_link = sale_list_link_new[item] #活动图书列表页 response = urllib2.urlopen(urllib2.Request(sale_list_link)) soup = BeautifulSoup(response, "html.parser") for jianlou_book in soup.find_all(id="jianlou_book"): img120 = jianlou_book.find(class_="img120") bookImageLink = img120["src"] #图书图片链接 bookName = img120["alt"] #图书名称 book_right_line = jianlou_book.find_all(class_="book_right_line") bookDetailLink = home + book_right_line[0].contents[0]["href"] #图书详情页链接 bookPrice = book_right_line[3].find(class_="xianjia").string #图书价格 response = urllib2.urlopen(urllib2.Request(bookDetailLink)) soup = BeautifulSoup(response, "html.parser") book_info = soup.find(class_="book_info") bookISBN = book_info.find(text=re.compile(r"\d{13}")) #图书ISBN for price_item in soup.find_all(class_="price_item"): #图书价格列表 book_price = price_item.find(class_="book_price_price") if not book_price: #可能为空 continue book_site = price_item.find(class_="book_site") bookLink = home + book_site.contents[0]["href"] bookSaler = home + book_site.contents[0].contents[0]["src"] bookCurrentPrice = book_price.contents[0] if repr(bookCurrentPrice)[0] != 'u': #无规则数据 continue bookCurrentPrice = bookCurrentPrice + book_price.contents[1].string result = BookPriceList( bookISBN = bookISBN, bookSaler = bookSaler, bookCurrentPrice = bookCurrentPrice, bookLink = bookLink) result.save() result = PromotionBookList( promotionID = promotionID, promotionBookISBN = bookISBN, promotionBookName = bookName, promotionBookImageLink = bookImageLink, promotionBookPrice = bookPrice) result.save() sale_list_link_old = sale_list_link_new sale_list_link_new = {} except urllib2.URLError, e: print e.reason
def sale_list_find(): #活动图书列表 global sale_list_link_old global sale_list_link_new home = "http://www.queshu.com" try: sale_list_link_diff = set(sale_list_link_old) - set(sale_list_link_new) #从活动列表及活动图书中删除旧记录 for item in set(sale_list_link_diff): result = Promotion.objects.filter(promotionID=item) #FIX 改成外键约束 result.delete() result = PromotionBookList.objects.filter(promotionID=item) result.delete() result = BookPriceList.objects.filter(promotionID=item) result.delete() sale_list_link_diff = set(sale_list_link_new) - set(sale_list_link_old) #添加新活动到列表 for item in set(sale_list_link_diff): promotionID = item sale_list_link = sale_list_link_new[item] #活动图书列表页 response = urllib2.urlopen(urllib2.Request(sale_list_link)) soup = BeautifulSoup(response, "html.parser") for jianlou_book in soup.find_all(id="jianlou_book"): img120 = jianlou_book.find(class_="img120") bookImageLink = img120["src"] #图书图片链接 bookName = img120["alt"] #图书名称 book_right_line = jianlou_book.find_all(class_="book_right_line") bookDetailLink = home + book_right_line[0].contents[0]["href"] #图书详情页链接 bookPrice = book_right_line[3].find(class_="xianjia").string #图书价格 response = urllib2.urlopen(urllib2.Request(bookDetailLink)) soup = BeautifulSoup(response, "html.parser") book_info = soup.find(class_="book_info") bookISBN = book_info.find(text=re.compile(r"\d{13}")) #图书ISBN for price_item in soup.find_all(class_="price_item"): #图书价格列表 book_price = price_item.find(class_="book_price_price") if not book_price: #可能为空 continue book_site = price_item.find(class_="book_site") bookLink = home + book_site.contents[0]["href"] bookSaler = home + book_site.contents[0].contents[0]["src"] bookCurrentPrice = book_price.contents[0] if repr(bookCurrentPrice)[0] != 'u': #无规则数据 continue bookCurrentPrice = bookCurrentPrice + book_price.contents[1].string result = BookPriceList( bookISBN = bookISBN, bookSaler = bookSaler, bookCurrentPrice = bookCurrentPrice, bookLink = bookLink) result.save() result = PromotionBookList( promotionID = promotionID, promotionBookISBN = bookISBN, promotionBookName = bookName, promotionBookImageLink = bookImageLink, promotionBookPrice = bookPrice) result.save() sale_list_link_old = sale_list_link_new sale_list_link_new = {} except urllib2.URLError, e: print e.reason