Пример #1
0
				break
			r3="http://www.homeshop18.com"+r3
			var4=request_passable(r3)
			if not var4:
				log.write(str(datetime.now())+":{0} skipped").format(r3)
				continue
			soup4=BeautifulSoup(var4.text)
			con =psycopg2.connect(database='compare', user='******', password='******')
			cur=con.cursor()
			i1=soup4.find('table',{'class':'more-detail-tb'})
			i2=(pattern2.findall(i1.text))[0]
			if i2 is None:
				i2=(pattern3.findall(i1.text))[0]
				if i2 is None:
					continue
				i2=isbncon.convert(str(i2))	
			isbn13=str(i2)
			d1=soup4.find('div',{'id':'product-info'},{'class':'product-info'})
			title=d1.h1.span.text
			d2=soup4.find('div',{'class':'costs'})
			if d2.h3.span is None:
				continue
			cost=int((pattern4.findall(d2.h3.span.text))[0])
			d3=soup4.find('table',{'class':'more-detail-tb'})
			d4=d3.find('a',itemprop="author")
			if d4 is None:
				author=''
			else:
				author=d4.text
			d6=soup4.find('div',{"class":"productMeduimImage clearfix"})
			if d6.img is None:
Пример #2
0
					full_link = str(base_link) + str(book_link)
					full_link=str(full_link)
					i += 1
					f2.write(full_link+'\n')
					book_details=request_passable(full_link)
					if not book_details:
						log.write(str(datetime.now())+":{0} skipped").format(full_link)
						continue
					detail_soup = BeautifulSoup(book_details.text)
					isbn = (detail_soup.find('span',itemprop="isbn"))
					if isbn is None:
						find1=detail_soup.find('div',{"class":"bksfdpltrArea"})
						if find1.ul.li.span.h2 is None:
							continue
						isbn=find1.ul.li.span.h2.text
						isbn13=isbncon.convert(str(isbn))
					else:
						isbn13 = str((isbn_re.findall(isbn.text))[0])
					print isbn13
					book_name = str((detail_soup.find('span',itemprop="name")).text)
					book_name = ' '.join((book_name).split())
					print book_name
					#book_name = whitespace_re.sub("",book_name)
					author = detail_soup.find('a',{"class":"skuAuthorName"})
					if author:
						author = str(author.text)
					else:
						author = ""
					print author
					book_prices = str((detail_soup.find('span',itemprop="price")).text)
					if (detail_soup.find('span',itemprop="price")) is None:
Пример #3
0
 i += 1
 f2.write(full_link + '\n')
 book_details = request_passable(full_link)
 if not book_details:
     log.write(str(datetime.now()) +
               ":{0} skipped").format(full_link)
     continue
 detail_soup = BeautifulSoup(book_details.text)
 isbn = (detail_soup.find('span', itemprop="isbn"))
 if isbn is None:
     find1 = detail_soup.find('div',
                              {"class": "bksfdpltrArea"})
     if find1.ul.li.span.h2 is None:
         continue
     isbn = find1.ul.li.span.h2.text
     isbn13 = isbncon.convert(str(isbn))
 else:
     isbn13 = str((isbn_re.findall(isbn.text))[0])
 print isbn13
 book_name = str((detail_soup.find('span',
                                   itemprop="name")).text)
 book_name = ' '.join((book_name).split())
 print book_name
 #book_name = whitespace_re.sub("",book_name)
 author = detail_soup.find('a', {"class": "skuAuthorName"})
 if author:
     author = str(author.text)
 else:
     author = ""
 print author
 book_prices = str(
Пример #4
0
								if find4 is None:
									continue
							cost=((find4.text).split(" "))[3]
							cost=cost.replace(",","")
							print cost
							find5=soup3.find('span',{"class":"ctbr-name"})
							if find5 is None:
								author=''
							else:
								author=find5.text
							print(author)
							find6=soup3.find('div',{"id":"features"},{"class":"clearfix"})
							if find6 is None:
								continue
							isbn=(pattern2.findall(find6.text))[0]
							isbn=isbncon.convert(str(isbn))
							print isbn
							cur.execute("select * from books_bookdata where isbn=%(isbn)s",{"isbn":isbn})
							row=cur.fetchone()
							if row is None:
								cur.execute("insert into books_bookdata(isbn,book_name,author_name,crossword_price,crossword_booklink) values(%s,%s,%s,%s,%s)",(isbn,title,author,cost,url))
							elif (row[2] is None and not(author is None)):
								cur.execute("update books_bookdata set author_name=%s where isbn=%s",(author,isbn))
								cur.execute("update books_bookdata set crossword_price=%s, crossword_booklink=%s where isbn=%s",(cost,url,isbn))
							else:
								cur.execute("update books_bookdata set crossword_price=%s, crossword_booklink=%s where isbn=%s",(cost,url,isbn))
							con.commit()    
				except Exception, e:
					traceback.print_exc()
					log.write(str(datetime.now())+":"+traceback.format_exc()+":"+url)
					continue		
Пример #5
0
         continue
 cost = ((find4.text).split(" "))[3]
 cost = cost.replace(",", "")
 #Debug:print cost
 find5 = soup3.find('span', {"class": "ctbr-name"})
 if find5 is None:
     author = ''
 else:
     author = find5.text
 #Debug:print(author)
 find6 = soup3.find('div', {"id": "features"},
                    {"class": "clearfix"})
 if find6 is None:
     continue
 isbn = (pattern_isbn.findall(find6.text))[0]
 isbn = isbncon.convert(str(isbn))
 #Debug: print isbn
 cur.execute(
     "select * from books_bookdata where isbn=%(isbn)s",
     {"isbn": isbn})
 row = cur.fetchone()
 if row is None:
     cur.execute(
         "insert into books_bookdata(isbn,book_name,author_name,crossword_price,crossword_booklink) values(%s,%s,%s,%s,%s)",
         (isbn, title, author, cost, url))
 elif (row[2] is None and not (author is None)):
     cur.execute(
         "update books_bookdata set author_name=%s where isbn=%s",
         (author, isbn))
     cur.execute(
         "update books_bookdata set crossword_price=%s, crossword_booklink=%s where isbn=%s",