def process_item(self, item, spider): if not item['ISBN']: # if item do not have isbn then drop it raise DropItem('Duplicate item found: %s' % item) if item['platform'] == 3: # if item is comments return to detail_pipeline return item new_book = Book() if item["name"]: new_book.title = item["name"][0] if item["price"]: new_book.price = item["price"][0] if item["author"]: new_book.author = item["author"][0] if item["press"]: new_book.press = item["press"][0] if item["instant"]: new_book.instant_price = item["instant"][0] if item["img"]: new_book.cover = item["img"][0] if item["description"]: new_book.description = item["description"][0] new_book.isbn = item["ISBN"][0] new_book.link = item["url"] new_book.platform = item['platform'] #new_book.time = ? new_book.platform = item['platform'] book_dao.insert(new_book)
def process_item(item, spider): if item['platform'] == -1: # not a book, drop it return item if not item['ISBN']: # not a book, drop it item['platform'] = -1 return item if item['platform'] == 3: # if item is comments return to detail_pipeline return item new_book = Book() if item["name"]: new_book.title = item["name"][0] if item["price"]: new_book.price = item["price"][0] if item["author"]: new_book.author = item["author"][0] if item["press"]: new_book.press = item["press"][0] if item["instant"]: new_book.instant_price = item["instant"][0] if item["img"]: new_book.cover = item["img"][0] if item["description"]: new_book.description = item["description"][0] new_book.isbn = item["ISBN"][0] new_book.link = item["url"] new_book.platform = item['platform'] #new_book.time = ? new_book.platform = item['platform'] book_dao.insert(new_book) return item
def process_item(self, item, spider): if(len(item['ISBN']) == 0): return item newbook = Book() newbook.title = item["name"][0] newbook.price = item["instant"][0] newbook.isbn = item["ISBN"][0] newbook.author = item["author"][0] newbook.press = item["press"][0] newbook.instant_price = item["price"][0] newbook.link = item["url"] newbook.cover = item["img"][0] if len(item["description"]) != 0 : newbook.description = item["description"][0] newbook.platform = item['platform'] #newbook.time = ? newbook.platform = item['platform'] book_dao.insert(newbook)
def test_insert(): for i in range(1, 101): book = Book() index = (i + 1) / 2 book.isbn = '%05d001' % (index) book.price = (index) * 1.0 book.title = 'title-%d' % (index) book.author = 'author-%s' % (index) book.press = 'press-%s' % (index / 10) book.description = 'description for book-%d' % (index) book.cover = 'cover-%d' % (index) book.link = 'http://www.oricinus_price/book-%d-%d' % (index, i % 2) book.platform = i % 2 book.instant_price = book.price * 0.8 book.crawling_time = i book_dao.insert(book)
def test_insert(): for i in range(1,101): book = Book() index = (i+1) / 2 book.isbn = '%05d001'%(index) book.price = (index)*1.0 book.title = 'title-%d'%(index) book.author = 'author-%s'%(index) book.press = 'press-%s'%(index / 10) book.description = 'description for book-%d'%(index) book.cover = 'cover-%d'%(index) book.link = 'http://www.oricinus_price/book-%d-%d'%(index, i%2) book.platform = i % 2 book.instant_price = book.price * 0.8 book.crawling_time = i book_dao.insert(book)
def __parse_po_to_book(self, bookpo, goodspo): book = Book() # bookpo->book book.isbn = bookpo.isbn book.price = bookpo.price book.title = bookpo.title book.author = bookpo.author book.press = bookpo.press book.description = bookpo.description book.cover = bookpo.cover #goodspo->book book.link = goodspo.link book.platform = goodspo.platform book.instant_price = goodspo.instant_price book.crawling_time = goodspo.crawling_time return book
def process_item(self, item, spider): if item['ISBN']: return item new_book = Book() if item["name"]: new_book.title = item["name"][0] if item["price"]: new_book.price = item["price"][0] if item["author"]: new_book.author = item["author"][0] if item["press"]: new_book.press = item["press"][0] if item["instant"]: new_book.instant_price = item["instant"][0] if item["img"]: new_book.cover = item["img"][0] if item["description"]: new_book.description = item["description"][0] new_book.isbn = item["ISBN"][0] new_book.link = item["url"] new_book.platform = item['platform'] #new_book.time = ? new_book.platform = item['platform'] book_dao.insert(new_book)