break r3="http://www.homeshop18.com"+r3 var4=request_passable(r3) if not var4: log.write(str(datetime.now())+":{0} skipped").format(r3) continue soup4=BeautifulSoup(var4.text) con =psycopg2.connect(database='compare', user='******', password='******') cur=con.cursor() i1=soup4.find('table',{'class':'more-detail-tb'}) i2=(pattern2.findall(i1.text))[0] if i2 is None: i2=(pattern3.findall(i1.text))[0] if i2 is None: continue i2=isbncon.convert(str(i2)) isbn13=str(i2) d1=soup4.find('div',{'id':'product-info'},{'class':'product-info'}) title=d1.h1.span.text d2=soup4.find('div',{'class':'costs'}) if d2.h3.span is None: continue cost=int((pattern4.findall(d2.h3.span.text))[0]) d3=soup4.find('table',{'class':'more-detail-tb'}) d4=d3.find('a',itemprop="author") if d4 is None: author='' else: author=d4.text d6=soup4.find('div',{"class":"productMeduimImage clearfix"}) if d6.img is None:
full_link = str(base_link) + str(book_link) full_link=str(full_link) i += 1 f2.write(full_link+'\n') book_details=request_passable(full_link) if not book_details: log.write(str(datetime.now())+":{0} skipped").format(full_link) continue detail_soup = BeautifulSoup(book_details.text) isbn = (detail_soup.find('span',itemprop="isbn")) if isbn is None: find1=detail_soup.find('div',{"class":"bksfdpltrArea"}) if find1.ul.li.span.h2 is None: continue isbn=find1.ul.li.span.h2.text isbn13=isbncon.convert(str(isbn)) else: isbn13 = str((isbn_re.findall(isbn.text))[0]) print isbn13 book_name = str((detail_soup.find('span',itemprop="name")).text) book_name = ' '.join((book_name).split()) print book_name #book_name = whitespace_re.sub("",book_name) author = detail_soup.find('a',{"class":"skuAuthorName"}) if author: author = str(author.text) else: author = "" print author book_prices = str((detail_soup.find('span',itemprop="price")).text) if (detail_soup.find('span',itemprop="price")) is None:
i += 1 f2.write(full_link + '\n') book_details = request_passable(full_link) if not book_details: log.write(str(datetime.now()) + ":{0} skipped").format(full_link) continue detail_soup = BeautifulSoup(book_details.text) isbn = (detail_soup.find('span', itemprop="isbn")) if isbn is None: find1 = detail_soup.find('div', {"class": "bksfdpltrArea"}) if find1.ul.li.span.h2 is None: continue isbn = find1.ul.li.span.h2.text isbn13 = isbncon.convert(str(isbn)) else: isbn13 = str((isbn_re.findall(isbn.text))[0]) print isbn13 book_name = str((detail_soup.find('span', itemprop="name")).text) book_name = ' '.join((book_name).split()) print book_name #book_name = whitespace_re.sub("",book_name) author = detail_soup.find('a', {"class": "skuAuthorName"}) if author: author = str(author.text) else: author = "" print author book_prices = str(
if find4 is None: continue cost=((find4.text).split(" "))[3] cost=cost.replace(",","") print cost find5=soup3.find('span',{"class":"ctbr-name"}) if find5 is None: author='' else: author=find5.text print(author) find6=soup3.find('div',{"id":"features"},{"class":"clearfix"}) if find6 is None: continue isbn=(pattern2.findall(find6.text))[0] isbn=isbncon.convert(str(isbn)) print isbn cur.execute("select * from books_bookdata where isbn=%(isbn)s",{"isbn":isbn}) row=cur.fetchone() if row is None: cur.execute("insert into books_bookdata(isbn,book_name,author_name,crossword_price,crossword_booklink) values(%s,%s,%s,%s,%s)",(isbn,title,author,cost,url)) elif (row[2] is None and not(author is None)): cur.execute("update books_bookdata set author_name=%s where isbn=%s",(author,isbn)) cur.execute("update books_bookdata set crossword_price=%s, crossword_booklink=%s where isbn=%s",(cost,url,isbn)) else: cur.execute("update books_bookdata set crossword_price=%s, crossword_booklink=%s where isbn=%s",(cost,url,isbn)) con.commit() except Exception, e: traceback.print_exc() log.write(str(datetime.now())+":"+traceback.format_exc()+":"+url) continue
continue cost = ((find4.text).split(" "))[3] cost = cost.replace(",", "") #Debug:print cost find5 = soup3.find('span', {"class": "ctbr-name"}) if find5 is None: author = '' else: author = find5.text #Debug:print(author) find6 = soup3.find('div', {"id": "features"}, {"class": "clearfix"}) if find6 is None: continue isbn = (pattern_isbn.findall(find6.text))[0] isbn = isbncon.convert(str(isbn)) #Debug: print isbn cur.execute( "select * from books_bookdata where isbn=%(isbn)s", {"isbn": isbn}) row = cur.fetchone() if row is None: cur.execute( "insert into books_bookdata(isbn,book_name,author_name,crossword_price,crossword_booklink) values(%s,%s,%s,%s,%s)", (isbn, title, author, cost, url)) elif (row[2] is None and not (author is None)): cur.execute( "update books_bookdata set author_name=%s where isbn=%s", (author, isbn)) cur.execute( "update books_bookdata set crossword_price=%s, crossword_booklink=%s where isbn=%s",