def is_ok_status_code(self, status_code, pqhtml, url, resp): # 下架 if status_code == 404: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return False, tool.return_data(successful=False, data=data) # 其他错误 if status_code != 200: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get( 'SCERR', 'ERROR'), backUrl=resp.url, html=pqhtml.outerHtml()) return False, tool.return_data(successful=False, data=data) return True, ''
def detail(self,url): try: resp = self.session.get(url, verify=False) status_code = resp.status_code pqhtml = PyQuery(resp.text or 'nothing') #下架 if status_code == 404 : log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #其他错误 if status_code != 200 : log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url)) self.logger.info(log_info) data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get('SCERR','ERROR'), backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #前期准备 area = pqhtml('.body-wrap .primary-wrap .product-area') domain = tool.get_domain(url) siteObj = self.get_siteObj(pqhtml) print area.outerHtml().encode('utf-8') # exit() #下架 if 'InStock' != area('meta[itemprop="availability"]').attr('content') or 'sold out' in area('.availability').text().lower() : log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) detail = dict() #产品ID productId = area('input[class="buy"][name="buy"][type="hidden"]').attr('value') or self.get_product_id(siteObj) detail['productId'] = productId #品牌 brand = self.get_brand(siteObj) detail['brand'] = brand #名称 detail['name'] = area('.product-title').text() #货币 currency = pqhtml('meta[itemprop="priceCurrency"]').attr('content') detail['currency'] = currency detail['currencySymbol'] = tool.get_unit(currency) #价格 price,listPrice = self.get_all_price(area) detail['price'] = price detail['listPrice'] = listPrice #颜色,图片,尺码信息 if area('.variation-dropdowns') : img,imgs,color,sizes = self.get_color_img_size(area,productId) detail['keys'] = color.keys() else : img = area('.main-product-image a').attr('href') imgs = [ li_a.attr('href').strip().replace('/300/300/','/600/600/') for li_a in area('ul.product-thumbnails li a').items()] color = self.cfg.DEFAULT_ONE_COLOR sizes = [dict(name=self.cfg.DEFAULT_ONE_SIZE,id=productId,sku=productId,inventory=self.cfg.DEFAULT_STOCK_NUMBER)] #颜色 # color = self.get_color(area) detail['color'] = color detail['colorId'] = {cid:cid for cid in color.keys()} if isinstance(color,dict) else productId #图片集 detail['img'] = img detail['imgs'] = imgs #规格 detail['sizes'] = sizes #描述 detail['descr'] = area('div[itemprop="description"]').text() + area('div.product-more-details').text() #详细 detail['detail'] = area('div.product-more-details').text() #退换货 detail['returns'] = area('.product-delivery-returns').text() #HTTP状态码 detail['status_code'] = status_code #状态 detail['status'] = self.cfg.STATUS_SALE #返回链接 detail['backUrl'] = resp.url #返回的IP和端口 if resp.raw._original_response.peer : detail['ip_port']=':'.join(map( lambda x:str(x),resp.raw._original_response.peer)) log_info = json.dumps(dict(time=time.time(), productId=detail['productId'], name=detail['name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url)) self.logger.info(log_info) return tool.return_data(successful=True, data=detail) except Exception, e: raise
def detail(self, url): try: resp = self.session.get(url, verify=False) status_code = resp.status_code pqhtml = PyQuery(resp.text or 'nothing') if status_code == 404: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.GET_ERR.get( 'SCERR', 'ERROR'), backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) if status_code != 200: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get( 'SCERR', 'ERROR'), backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) JscriptTxt = pqhtml('script').text() pqhtml.remove('script').remove('style') area = pqhtml('div#product-summary') # print area.outerHtml().encode('utf-8') buttonTxt = area('#product-form .add-button').text() if u'售罄' in buttonTxt.lower() or u'sold out' in buttonTxt.lower(): log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.GET_ERR.get( 'SCERR', 'ERROR'), backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) detail = dict() #所有图片 imgs = self.get_imgs(pqhtml) detail['imgs'] = imgs detail['img'] = imgs[0] #名称 detail['name'] = area('h1.brand').text() + ' ' + area( '.name').text() #货币 currency = area('span.regular-price').text().split()[0] detail['currency'] = currency detail['currencySymbol'] = tool.get_unit(currency) #价格 price, listPrice = self.get_all_price(area) detail['price'] = price detail['listPrice'] = listPrice color, sizes = self.get_sizes(area) #颜色 detail['color'] = color #sizes detail['sizes'] = sizes #下架: if isinstance(detail['sizes'], basestring) and detail['sizes'] == 'sold out': log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #描述 detail['descr'] = area('div#description').text() or pqhtml( '#product-details .product-details-section').text() #品牌 detail['brand'] = area('h1.brand').text() #产品ID prodId = area.attr('data-id') detail['productId'] = prodId detail['colorId'] = prodId #HTTP状态码 detail['status_code'] = status_code #状态 detail['status'] = self.cfg.STATUS_SALE #返回链接 detail['backUrl'] = resp.url #返回的IP和端口 if resp.raw._original_response.peer: detail['ip_port'] = ':'.join( map(lambda x: str(x), resp.raw._original_response.peer)) log_info = json.dumps( dict(time=time.time(), productId=detail['productId'], name=detail['name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url)) self.logger.info(log_info) return tool.return_data(successful=True, data=detail) except Exception, e: raise
def detail(self, url): try: resp = requests.get(url, verify=False) status_code = resp.status_code pqhtml = PyQuery(resp.text or 'nothing') #下架 if status_code == 404: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #其他错误 if status_code != 200: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get( 'SCERR', 'ERROR'), backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #前期准备 area = pqhtml('#bd .grid') # domain = tool.get_domain(url) pdata = self.get_pdata(pqhtml) # print area.outerHtml().encode('utf-8') #下架 if not len(area('.p-buy #addCart .buynow')): log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #产品应该只有一个 if len(pdata['prods']) != 1: raise ValueError('yintai product data length great than 1') detail = dict() #品牌 brand = area('h4.y-pro-cooper-name').text() detail['brand'] = brand #名称 detail['name'] = pdata['prods'][0]['name'] #货币 currency = 'CNY' detail['currency'] = currency detail['currencySymbol'] = tool.get_unit(currency) price = pdata['prods'][0]['price'] if u'直降' in area('#Y_ProBen').text(): self.session.headers['Referer'] = url self.session.headers['X-Requested-With'] = 'XMLHttpRequest' # self.session.headers['Origin'] = 'http://item.yintai.com' self.session.headers['Origin'] = url # subArea = PyQuery(self.session.post(url,data=dict()).text) subArea = PyQuery( requests.post(url, data=dict(), headers=self.session.headers, cookies=resp.cookies).text) price = re.search( r'(\d[\d\.]*)', subArea('.marketPriceNum .yt-num').text()).groups()[0] price = price + subArea('.marketPriceNum .yt-num em').text() #价格,该业务逻辑后边删除 detail['price'] = float(price) detail['listPrice'] = pdata['prods'][0]['mPrice'] # print area('.productInfo .s-s-color').next()('a[href="Javascript:void(0);"]').outerHtml().encode('utf-8') # print area('.productInfo .s-s-color').next()('.selected a').text() #颜色 # color = self.get_color(area) color = area('.productInfo .s-s-color').next()( 'a[href="Javascript:void(0);"]').text() color = color or area('.productInfo .s-s-color').next()( '.selected a').text() #2016-12-15添加 detail['color'] = color detail['colorId'] = pdata['prods'][0]['colorID'] #图片集 imgs = self.get_imgs(area) detail['img'] = imgs[0] detail['imgs'] = imgs #产品ID productId = pdata['prods'][0]['sku'] detail['productId'] = productId #规格 detail['sizes'] = self.get_sizes(area) #描述 detail['descr'] = area('.yp-con-desc').text() #HTTP状态码 detail['status_code'] = status_code #状态 detail['status'] = self.cfg.STATUS_SALE #返回链接 detail['backUrl'] = resp.url self.session.cookies = RequestsCookieJar() self.session.headers = tool.get_one_header() log_info = json.dumps( dict(time=time.time(), productId=detail['productId'], name=detail['name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url)) self.logger.info(log_info) return tool.return_data(successful=True, data=detail) except Exception, e: if 'get YinTai_TagData Fail' in str(e) and self._retry < 10: self._retry += 1 return self.detail(url) elif self._retry >= 10: raise ValueError('yintai retry five times ,{0}'.format(str(e))) else: raise
def detail(self,url): try: resp = self.session.get(url, verify=False) status_code = resp.status_code pqhtml = PyQuery(resp.text or 'nothing') #下架 if status_code == 404 : log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #其他错误 if status_code != 200 : log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url)) self.logger.info(log_info) data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get('SCERR','ERROR'), backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #前期准备 area = pqhtml('.product-detail') detail_tab = pqhtml('#product-detail-tabs') img_tab = pqhtml('div.images') domain = tool.get_domain(url) pdata = self.get_pdata(pqhtml) # print area.outerHtml().encode('utf-8') # print json.dumps(pdata) # print detail_tab.outerHtml().encode('utf-8') # print img_tab.outerHtml().encode('utf-8') # exit() #下架 if not area or 'out of stock' in area('.out-of-stock').text(): log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) video_prefix = 'http://image1.superdry.com/static/images/products/' detail = dict() detail['stock'] = pdata['product']['stock'] #该商品总库存. detail['video'] = video_prefix+pdata['product']['video'] detail['gender'] = pdata['product']['gender'] detail['season'] = pdata['product']['season'] detail['category'] = pdata['product']['category'] detail['productSku'] = pdata['product']['sku_code'] detail['size_guide'] = pdata['product']['size_guide'] detail['subcategory'] = pdata['product']['subcategory'] detail['productCode'] = pdata['product']['sku_code'] #产品ID productId = pdata['product']['id'] detail['productId'] = productId #品牌 brand = 'SUPERDRY' detail['brand'] = brand #名称 detail['name'] = pdata['product']['name'] #货币 currency = pdata['product']['currency'] detail['currency'] = currency detail['currencySymbol'] = tool.get_unit(currency) #价格 detail['price'] = pdata['product']['unit_sale_price'] detail['listPrice'] = pdata['product']['unit_price'] #描述 detail['descr'] = pdata['product']['description'] #详细 detail['detail'] = detail_tab.text() #退换货 detail['returns'] = detail_tab('tab-page:last').text() #颜色 detail['color'] = pdata['product']['color'] detail['colorId'] = pdata['product']['color'] #图片集 imgs = [ ele.attr('src') for ele in img_tab('.scroller li img').items()] imgs = map(lambda x : x.replace('/productthumbs/','/zoom/'), imgs) detail['img'] = img_tab('.scroller li img:first').attr('src').replace('/productthumbs/','/zoom/') detail['imgs'] = imgs #规格 detail['sizes'] = self.get_sizes(area) #HTTP状态码 detail['status_code'] = status_code #状态 detail['status'] = self.cfg.STATUS_SALE #返回链接 detail['backUrl'] = resp.url #返回的IP和端口 if resp.raw._original_response.peer : detail['ip_port']=':'.join(map( lambda x:str(x),resp.raw._original_response.peer)) log_info = json.dumps(dict(time=time.time(), productId=detail['productId'], name=detail['name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url)) self.logger.info(log_info) return tool.return_data(successful=True, data=detail) except Exception, e: raise
def detail(self,url): try: resp = self.session.get(url, verify=False) status_code = resp.status_code pqhtml = PyQuery(resp.text or 'nothing') #下架 if status_code == 404 : log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #其他错误 if status_code != 200 : log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url)) self.logger.info(log_info) data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get('SCERR','ERROR'), backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #前期准备: Jtxt = pqhtml('script').text() pdata = self.get_pdata(Jtxt) area = pqhtml('#detail-display-wrapper') #下架 if not pdata : data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) detail = dict() #名称 detail['name'] = pqhtml('h2.detail-title').text() #品牌 detail['brand'] = self.get_brand(area) #价格符号 currency = pqhtml('meta[itemprop="priceCurrency"]:first').attr('content') detail['currency'] = currency detail['currencySymbol'] = tool.get_unit(currency) #颜色 detail['color'] = pqhtml('ul.detail-additional-info:first>li:last').text() #价格 detail['price'] = pdata['Products']['Info']['BasePrice'].replace(',','') listPrice = pdata['Products']['Info']['OldPrice'].replace(',','') detail['listPrice'] = (pqhtml('span.strokeText>span.price').text() or pqhtml('div#detail-display-info-wrapper span.price').text())[1:] #图片集合 imgsTmp = [li.attr('data-zoom') for li in pqhtml('div#detail-display-icon ul').children('li').items()] detail['img'] = imgsTmp[0] detail['imgs'] = imgsTmp #规格 detail['sizes'] = self.get_sizes(pdata,area) #描述 detail['descr'] = area('p.detail-description:first').text() #产品ID detail['productId'] = pdata['Products']['Info']['ParentProductId'] detail['colorId'] = pdata['Products']['Info']['ParentProductId'] #HTTP状态码 detail['status_code'] = status_code #状态 detail['status'] = self.cfg.STATUS_SALE #返回链接 detail['backUrl'] = url log_info = json.dumps(dict(time=time.time(), productId=detail['productId'], name=detail[ 'name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url)) self.logger.info(log_info) return tool.return_data(successful=True, data=detail) except Exception, e: raise
def detail(self, url): try: resp = self.session.get(url, verify=False) status_code = resp.status_code pqhtml = PyQuery(resp.text or 'nothing') #下架 if status_code == 404: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) if status_code != 200: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get( 'SCERR', 'ERROR'), backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) area = pqhtml('#content') self.link_area = re.search(r'/en-(\w{2})/', url).groups()[0] SoldOut = self.checkSoldOut(pqhtml) if SoldOut: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) pdata = self.get_pdata(area) detail = dict() #品牌 brand = pdata['brand']['name'] detail['brand'] = brand #名称 detail['name'] = brand + ' ' + pdata['name'] #货币单位 currency = pdata['price']['currency'] detail['currency'] = currency detail['currencySymbol'] = tool.get_unit(currency) #价格 price, listPrice = self.get_all_price(pdata) detail['price'] = price detail['listPrice'] = listPrice #图片集 imgsTmp = self.get_imgs(area) detail['img'] = imgsTmp[0] detail['imgs'] = imgsTmp #规格 sizesTmp = self.get_sizes(pdata) if sizesTmp is None: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #处理one size if len(sizesTmp) == 1 and sizesTmp[0]['name'].lower() in [ 'one size', 'onesize' ]: sizesTmp[0]['name'] = self.cfg.DEFAULT_ONE_SIZE detail['sizes'] = sizesTmp #视频 if 'videos' in pdata and pdata['videos']: detail['video'] = self.get_video(pdata) #产品注意: detail['note'] = area( 'section.product-accordion--desktop>section:first').text() #产品sizeFit detail['sizeFit'] = area( 'section.product-accordion--desktop>section:eq(1)').text() #产品详情 detail['detail'] = area( 'section.product-accordion--desktop>section:eq(2)').text() #产品送货 detail['delivery'] = area( 'section.product-accordion--desktop>section:last').text() #产品退货 detail['returns'] = area( 'section.product-accordion--desktop>section:last').text() #描述 detail['descr'] = self.get_descr(area) #产品ID detail['productId'] = pdata['id'] print #颜色 detail['color'] = pdata['colourInfo'][0]['colourName'] if pdata[ 'colourInfo'] else self.cfg.DEFAULT_ONE_COLOR #颜色ID detail['colorId'] = (pdata['colourInfo'][0]['colourId'] or self.cfg.DEFAULT_COLOR_SKU ) if pdata['colourInfo'] else pdata['id'] #HTTP状态码 detail['status_code'] = status_code #状态 detail['status'] = self.cfg.STATUS_SALE #返回链接 detail['backUrl'] = resp.url log_info = json.dumps( dict(time=time.time(), productId=detail['productId'], name=detail['name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url)) self.logger.info(log_info) return tool.return_data(successful=True, data=detail) except TooManyRedirects as e: self.logger.exception(e) data = tool.get_off_shelf(code=0, message=self.cfg.SOLD_OUT, backUrl=url, html=str(e)) return tool.return_data(successful=False, data=data) except Exception, e: raise
def detail(self, url): try: resp = self.session.get(url, verify=False) status_code = resp.status_code pqhtml = PyQuery(resp.text or 'nothing') #下架 if status_code == 404: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #其他错误 if status_code != 200: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get( 'SCERR', 'ERROR'), backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #前期准备 # area = pqhtml('.caption-product') area = pqhtml('.product-single-section-main') imgArea = pqhtml('.slider') domain = tool.get_domain(url) pdata = self.get_pdata(pqhtml('head')) # print area.outerHtml().encode('utf-8') # exit() #下架 # if len(area('#variant-listbox')) == 0 : # log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url)) # self.logger.info(log_info) # data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml()) # return tool.return_data(successful=False, data=data) detail = dict() #品牌 brand = pdata['product']['vendor'] detail['brand'] = brand #名称 detail['name'] = area('h1[itemprop="name"]').text() #货币 currency = pqhtml('meta[itemprop="priceCurrency"]').attr('content') detail['currency'] = currency detail['currencySymbol'] = tool.get_unit(currency) #价格 price, listPrice = self.get_all_price(area) detail['price'] = price detail['listPrice'] = listPrice #颜色 # color = self.get_color(area) detail['color'] = self.cfg.DEFAULT_ONE_COLOR detail['colorId'] = pdata['product']['id'] #图片集 # imgs = [ 'https:'+a.attr('src') for a in imgArea('img').items()] imgs = [ 'http:' + img.attr('src') for img in area('.super-slider-main img').items() ] detail['img'] = imgs[0] detail['imgs'] = imgs #产品ID productId = pdata['product']['id'] detail['productId'] = productId #规格 detail['sizes'] = self.get_sizes(pdata, area) #描述 detail['descr'] = area('.product-single-details-dropdown').text() #HTTP状态码 detail['status_code'] = status_code #状态 detail['status'] = self.cfg.STATUS_SALE #返回链接 detail['backUrl'] = resp.url log_info = json.dumps( dict(time=time.time(), productId=detail['productId'], name=detail['name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url)) self.logger.info(log_info) return tool.return_data(successful=True, data=detail) except Exception, e: raise
def detail(self, url): try: self.domain = tool.get_domain(url) resp = self.session.get(url, verify=False) status_code = resp.status_code pqhtml = PyQuery(resp.text or 'nothing') #下架 if status_code == 404: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #其他错误 if status_code != 200: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get( 'SCERR', 'ERROR'), backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #前期准备 area = pqhtml('#content>#productContainer') pdata = self.get_pdata(pqhtml) # print area.outerHtml() # print json.dumps(pdata) # exit() #下架 if not area or area('.productButtons #disabledAddtobasket'): log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) detail = dict() #品牌 brand = 'COS' detail['brand'] = brand #名称 detail['name'] = area('.productInfo h1:first').text() #货币 currency = pqhtml('meta[property="og:price:currency"]').attr( 'content') detail['currency'] = currency detail['currencySymbol'] = tool.get_unit(currency) #价格 price, listPrice = self.get_all_price(pqhtml, area) detail['price'] = price detail['listPrice'] = listPrice #颜色 color = self.get_color(area) detail['color'] = color detail['colorId'] = dict([(key, key) for key in color.keys()]) #图片集 imgs = self.get_imgs(area) detail['img'] = imgs[0] if isinstance(imgs, list) else dict( [(cid, Arr[0]) for cid, Arr in imgs.items()]) detail['imgs'] = imgs #钥匙 detail['keys'] = color.keys() #产品ID productId = area('input[data-product-identifier!=""]').attr( 'data-product-identifier') detail['productId'] = productId #规格 detail['sizes'] = self.get_sizes(area) #描述 detail['descr'] = area('.productInfo>.infowrap>dl>dd:first').text() #退换货 detail['returns'] = area( '.productInfo>.infowrap>dl>dd:first').text() #HTTP状态码 detail['status_code'] = status_code #状态 detail['status'] = self.cfg.STATUS_SALE #返回链接 detail['backUrl'] = url log_info = json.dumps( dict(time=time.time(), productId=detail['productId'], name=detail['name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url)) self.logger.info(log_info) return tool.return_data(successful=True, data=detail) except Exception, e: raise
def detail(self, url): try: resp = self.session.get(url, verify=False) status_code = resp.status_code pqhtml = PyQuery(resp.text or 'nothing') #下架 if status_code == 404: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #其他错误 if status_code != 200: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get( 'SCERR', 'ERROR'), backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #前期准备 area = pqhtml('.product-detail-container') domain = tool.get_domain(url) # print area.outerHtml() # exit() #下架 if u'缺货' in area('#stock-status').text(): log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) detail = dict() #品牌 brand = area('#brand:first span').text() or area('#brand a').text() detail['brand'] = brand #名称 detail['name'] = area('#name').text() #货币 currency = area('#price-currency').attr('content') detail['currency'] = currency detail['currencySymbol'] = tool.get_unit(currency) #价格 price, listPrice = self.get_all_price(area) detail['price'] = price detail['listPrice'] = listPrice #颜色 detail['color'] = self.cfg.DEFAULT_ONE_COLOR detail['colorId'] = self.cfg.DEFAULT_COLOR_SKU #图片集 imgs = [ a.attr('data-large-img') for a in area( '.image-container .thumbnail-container img').items() ] or [ img.attr('src') for img in area('#iherb-product-zoom img').items() ] imgs = imgs or [ area('#product-image .product-summary-image a').attr('href') ] detail['img'] = imgs[0] detail['imgs'] = imgs #产品ID productId = area('input[name="pid"]').attr('value') detail['productId'] = productId #规格 stock_txt = area('#stock-status').text() inv = area('#ddlQty option:last').attr( 'value' ) if 'In Stock' in stock_txt or u'有库存' in stock_txt else 0 detail['sizes'] = [ dict(name=self.cfg.DEFAULT_ONE_SIZE, inventory=inv, id=productId, sku=productId) ] #描述 detail['descr'] = area('#product-specs-list li').text() #详细 detail['detail'] = pqhtml('div[itemprop="description"]').text() #HTTP状态码 detail['status_code'] = status_code #状态 detail['status'] = self.cfg.STATUS_SALE #返回链接 detail['backUrl'] = url log_info = json.dumps( dict(time=time.time(), productId=detail['productId'], name=detail['name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url)) self.logger.info(log_info) return tool.return_data(successful=True, data=detail) except Exception, e: raise
def detail(self, url): try: resp = self.session.get(url, verify=False) status_code = resp.status_code pqhtml = PyQuery(resp.text or 'nothing') #下架 if status_code == 404: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #其他错误 if status_code != 200: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get( 'SCERR', 'ERROR'), backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #前期准备 area = pqhtml('#pdp-page') domain = tool.get_domain(url) # print area.outerHtml() # exit() #下架 if not area: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) detail = dict() #产品ID productId = pqhtml('img[data-stylenumber!=""]').attr( 'data-stylenumber').split('_')[0] detail['productId'] = productId #品牌 brand = 'Lululemon' detail['brand'] = brand #名称 detail['name'] = area('h1.OneLinkNoTx').text() #货币 currency = pqhtml('input#currencyCode').attr('value').strip() detail['currency'] = currency detail['currencySymbol'] = tool.get_unit(currency) #价格 price, listPrice = self.get_all_price(area) detail['price'] = price detail['listPrice'] = listPrice #描述 detail['descr'] = self.get_descr(pqhtml, area) #详细 detail['detail'] = area('#fabric').text() #退换货 detail['returns'] = '' colorDriver, colorCount = self.get_pdata(pqhtml) #颜色 img, imgs, color = self.get_color(area, colorCount) detail['color'] = color detail['colorId'] = {key: key for key in color} #图片集 detail['img'] = img detail['imgs'] = imgs #规格 sizes, price = self.get_sizes(colorDriver) detail['sizes'] = sizes detail['price'] = price if isinstance(color, dict): detail['keys'] = [key for key in color] #HTTP状态码 detail['status_code'] = status_code #状态 detail['status'] = self.cfg.STATUS_SALE #返回链接 detail['backUrl'] = resp.url #返回的IP和端口 if resp.raw._original_response.peer: detail['ip_port'] = ':'.join( map(lambda x: str(x), resp.raw._original_response.peer)) log_info = json.dumps( dict(time=time.time(), productId=detail['productId'], name=detail['name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url)) self.logger.info(log_info) return tool.return_data(successful=True, data=detail) except Exception, e: raise
def detail(self,url): try: resp = self.session.get(url, verify=False) status_code = resp.status_code pqhtml = PyQuery(resp.text or 'nothing') #下架 if status_code == 404 : log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #其他错误 if status_code != 200 : log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url)) self.logger.info(log_info) data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get('SCERR','ERROR'), backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #前期准备 area = pqhtml('.page-content') domain = tool.get_domain(url) # pdata = self.get_pdata(pqhtml) # print area.outerHtml() # print json.dumps(pdata) # exit() #下架 if area('div[itemprop="availability"]').text().strip() != 'Available' : log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) detail = dict() #品牌 brand = 'Kit and Ace' detail['brand'] = brand #名称 detail['name'] = area('h1[itemprop="name"]').text() #货币 currency = pqhtml('meta[itemprop="priceCurrency"]').attr('content') detail['currency'] = currency detail['currencySymbol'] = tool.get_unit(currency) #价格 price,listPrice = self.get_all_price(area) detail['price'] = price detail['listPrice'] = listPrice #颜色 color = self.get_color(area) detail['color'] = color detail['colorId'] = dict([ (key,key) for key in color.keys() ]) #图片集 imgs = self.get_imgs(area,domain) detail['img'] = imgs[0] if isinstance(imgs,list) else dict([ (cid,Arr[0]) for cid,Arr in imgs.items() ]) detail['imgs'] = imgs #钥匙 detail['keys'] = color.keys() #产品ID productId = area('.js-pdp-product-code').attr('data-product-id') detail['productId'] = productId #规格 detail['sizes'] = self.get_sizes(area) #描述 detail['descr'] = area('.pdp-desc__description').text() #构造物 detail['fabric'] = area('.pdp-info-components').text() #详细 detail['detail'] = area('.productDetailsPageSection1').text() #退换货 detail['returns'] = area('.productInfo>.infowrap>dl>dd:first').text() #模特信息 detail['model'] = self.get_model(area,color.keys()) #HTTP状态码 detail['status_code'] = status_code #状态 detail['status'] = self.cfg.STATUS_SALE #返回链接 detail['backUrl'] = url log_info = json.dumps(dict(time=time.time(), productId=detail['productId'], name=detail[ 'name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url)) self.logger.info(log_info) return tool.return_data(successful=True, data=detail) except Exception, e: raise
def detail(self,url): pqhtml = '' try: resp = self.session.get(url, verify=False) status_code = resp.status_code pqhtml = PyQuery(resp.text or 'nothing') #下架 if status_code == 404 : log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #其他错误 if status_code != 200 : log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url)) self.logger.info(log_info) data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get('SCERR','ERROR'), backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #前期准备 area = pqhtml('form[name="productPage"]') domain = tool.get_domain(url) # print area.html().encode('utf-8') # exit() need_refresh_node = pqhtml('meta[http-equiv="refresh"]') if len(need_refresh_node) : time_limit = need_refresh_node.attr('content').strip().split(';')[0] sleep_seconds = int(time_limit)/2 time.sleep(sleep_seconds) #<RequestsCookieJar[]> self.session.cookies.set('INSTART_SESSION_ID',str(int((time.time()-sleep_seconds)*1000))) resp = self.session.get(url, verify=False) pqhtml = PyQuery(resp.text) area = pqhtml('form[name="productPage"]') #下架 if not area or len(area('.cannotorder')): # if not area : log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) productId = area('input[name$="productId"][value!=""]').attr('value') pdata = self.get_pdata(area,productId) detail = dict() #产品ID detail['productId'] = productId #品牌 brand = area('input.cmDesignerName').attr('value') detail['brand'] = brand #名称 detail['name'] =area('h1.product-name:first').text() #货币 currency = pqhtml('meta[itemprop="priceCurrency"]').attr('content') detail['currency'] = currency detail['currencySymbol'] = tool.get_unit(currency) #价格 price,listPrice = self.get_all_price(area) detail['price'] = price detail['listPrice'] = listPrice #描述 detail['descr'] = area('div[itemprop="description"]').text() #详细 detail['detail'] = area('.product-details-info').text() #颜色 # color = self.get_color(area) # detail['color'] = self.cfg.DEFAULT_ONE_COLOR # detail['colorId'] = self.cfg.DEFAULT_COLOR_SKU #图片集 img,imgs = self.get_imgs(area) detail['img'] = img detail['imgs'] = imgs #规格 sizes = self.get_sizes(pdata) detail['sizes'] = sizes if isinstance(sizes,dict): detail['keys'] = sizes.keys() detail['color'] = {key:key for key in sizes} detail['colorId'] = {key:key for key in sizes} #HTTP状态码 detail['status_code'] = status_code #状态 detail['status'] = self.cfg.STATUS_SALE #返回链接 detail['backUrl'] = resp.url #返回的IP和端口 if resp.raw._original_response.peer : detail['ip_port']=':'.join(map( lambda x:str(x),resp.raw._original_response.peer)) log_info = json.dumps(dict(time=time.time(), productId=detail['productId'], name=detail['name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url)) self.logger.info(log_info) return tool.return_data(successful=True, data=detail) except Exception, e: self.logger.exception('html:{0}'.format(pqhtml)) raise
def detail(self, url): try: resp = self.session.get(url, verify=False) status_code = resp.status_code pqhtml = PyQuery(resp.text or 'nothing') #下架 if status_code == 404: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #其他错误 if status_code != 200: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get( 'SCERR', 'ERROR'), backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #前期准备 area = pqhtml('.product-detail-information') # domain = tool.get_domain(url) # exit() #下架 # if area('div[itemprop="availability"]').text().strip() != 'Available' : # data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml()) # return tool.return_data(successful=False, data=data) detail = dict() #品牌 # brand = re.search(r'brand: \'(.*?)\',',pqhtml('script[type="text/javascript"]').text(),re.DOTALL).groups()[0] brand = pqhtml('.product-brand img:first').attr('alt').split()[0] detail['brand'] = brand #名称 ,最近修改,2016-09-30 16:36:32 detail['name'] = area('.J_title_name').text() or area( '.title-name').text() #货币 currency = pqhtml('a#select_currency').text().split()[0] detail['currency'] = currency detail['currencySymbol'] = tool.get_unit(currency) #价格 price, listPrice = self.get_all_price(area) detail['price'] = price detail['listPrice'] = listPrice #颜色 # color = self.get_color(area) detail['color'] = self.cfg.DEFAULT_ONE_COLOR detail['colorId'] = self.cfg.DEFAULT_COLOR_SKU #图片集 imgs = [ a.attr('href') for a in pqhtml( '.product-detail-preview .toolbar>li>a').items() ] detail['img'] = imgs[0] detail['imgs'] = imgs #产品ID productId = area('.product-detail-selection-sku').text() detail['productId'] = productId #规格 detail['sizes'] = self.get_sizes(area) #描述 detail['descr'] = area('#product-description-tab').text() #详细 detail['detail'] = area('#product-description-tab').text() #退换货 detail['returns'] = area('.product-directions').text() #HTTP状态码 detail['status_code'] = status_code #状态 detail['status'] = self.cfg.STATUS_SALE #返回链接 detail['backUrl'] = url log_info = json.dumps( dict(time=time.time(), productId=detail['productId'], name=detail['name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url)) self.logger.info(log_info) return tool.return_data(successful=True, data=detail) except Exception, e: raise
def detail(self, url): resp = self.session.get(url, verify=False) status_code = resp.status_code pqhtml = PyQuery(resp.text or 'nothing') #下架 if status_code == 404: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #其他错误 if status_code != 200: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get( 'SCERR', 'ERROR'), backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #前期准备 # area = pqhtml('.product-detail-information') self.domain = tool.get_domain(url) # pdata = self.get_pdata(area) # print pqhtml.outerHtml().encode('utf-8') # exit() #下架 # if True : # log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url)) # self.logger.info(log_info) # data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml()) # return tool.return_data(successful=False, data=data) detail = dict() #品牌 brand = self.getBrandByHtml(pqhtml).strip() detail['brand'] = brand or 'MYGEEK' #名称 detail['name'] = pqhtml('span.title strong').text().strip() #货币 currency = 'CNY' detail['currency'] = currency detail['currencySymbol'] = tool.get_unit(currency) #价格 price, listPrice = self.getPriceByHtml(pqhtml) detail['price'] = price detail['listPrice'] = listPrice #颜色 color = self.get_color(pqhtml) detail['color'] = color detail['colorId'] = {k: k for k in color.keys()} if isinstance( color, dict) else self.cfg.DEFAULT_COLOR_SKU #skus: if isinstance(color, dict): detail['keys'] = color.keys() #图片集 imgs = self.getImgsByHtml(pqhtml) detail['img'] = imgs[0] detail['imgs'] = imgs #产品ID productId = re.search( r'id=(\d*)', pqhtml('div.pid5 form:first').attr('action')).groups()[0] detail['productId'] = productId #规格 detail['sizes'] = self.getSizesByHtml(pqhtml) #描述 detail['descr'] = pqhtml('#pid1_2').remove('.title').remove( 'script').text() + pqhtml('.pid2').remove('.title').remove( 'script').text() #HTTP状态码 detail['status_code'] = status_code #状态 detail['status'] = self.cfg.STATUS_SALE #返回链接 detail['backUrl'] = resp.url log_info = json.dumps( dict(time=time.time(), productId=detail['productId'], name=detail['name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url)) self.logger.info(log_info) return tool.return_data(successful=True, data=detail)
def detail(self, url): resp = self.session.get(url, verify=False) status_code = resp.status_code pqhtml = PyQuery(resp.text or 'nothing') # 下架 if status_code == 404: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) # 其他错误 if status_code != 200: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get( 'SCERR', 'ERROR'), backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) # 前期准备: # Jtxt = pqhtml('script').text() pdata = self.get_pdata(pqhtml) area = pqhtml('.productDetailSummary') pinfo = pqhtml('#productInfo') imgPath = url.split('/')[3] # print area.outerHtml() # print json.dumps(pdata) # exit() # 下架 if not pdata: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) detail = dict() # 名称 detail['name'] = pqhtml('.productName').text() # 品牌 detail['brand'] = pqhtml('.productName a').text() # 货币 currency = area('span[itemprop="priceCurrency"]').text() detail['currency'] = currency detail['currencySymbol'] = tool.get_unit(currency) # 价格 price, listPrice = self.get_all_price(area) detail['price'] = price detail['listPrice'] = listPrice # 图片集 img, imgs = self.get_imgs(area, imgPath) detail['img'] = img detail['imgs'] = imgs # 视频 if len(area('.MagicScroll .productVideo')) > 0: detail['video'] = area('.MagicScroll a.productVideo').attr( 'data-video-url') # 颜色 colors, sizes = self.get_colors_sizes(area, pdata) detail['color'] = colors detail['sizes'] = sizes detail['keys'] = colors.keys() detail['colorId'] = dict([(key, key) for key in colors.keys()]) # 产品ID productId = area('input#baseNo').attr('value') detail['productId'] = productId # 描述 detail['descr'] = pinfo('#overview').text() # 详情 detail['detail'] = pinfo('#specs').text() # HTTP状态码 detail['status_code'] = status_code # 状态 detail['status'] = self.cfg.STATUS_SALE # 返回链接 detail['backUrl'] = url log_info = json.dumps( dict(time=time.time(), productId=detail['productId'], name=detail['name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url)) self.logger.info(log_info) return tool.return_data(successful=True, data=detail)
def detail(self,url): try: resp = self.session.get(url, verify=False) status_code = resp.status_code pqhtml = PyQuery(resp.text or 'nothing') #下架 if status_code == 404 : log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #其他错误 if status_code != 200 : log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url)) self.logger.info(log_info) data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get('SCERR','ERROR'), backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #前期准备 area = pqhtml('#theater') domain = tool.get_domain(url) pdata = self.get_pdata(pqhtml('script:gt(20)')) # exit() #下架 # if True : # log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url)) # self.logger.info(log_info) # data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml()) # return tool.return_data(successful=False, data=data) detail = dict() #品牌 brand = area('.brand').text() detail['brand'] = brand #名称 detail['name'] = area('h1:first').text() currencySymbol,price,listPrice = self.get_price_info(pdata) if currencySymbol != '$' : raise ValueError('currencySymbol is not USD') #货币 currency = 'USD' detail['currency'] = currency detail['currencySymbol'] = tool.get_unit(currency) #价格 detail['price'] = price detail['listPrice'] = listPrice #颜色 color = self.get_color(pdata) detail['color'] = color detail['colorId'] = {cid:cid for cid in color.keys() } #图片集 img,imgs = self.get_imgs(pdata) detail['img'] = img detail['imgs'] = imgs #产品ID productId = pqhtml('input[name="productId"]').attr('value') detail['productId'] = productId #规格 sizes = self.get_sizes(pdata) detail['sizes'] = sizes #描述 detail['descr'] = area('.description').text() detail['keys'] = set(img.keys())&set(sizes.keys()) #HTTP状态码 detail['status_code'] = status_code #状态 detail['status'] = self.cfg.STATUS_SALE #返回链接 detail['backUrl'] = resp.url #返回的IP和端口 if resp.raw._original_response.peer : detail['ip_port']=':'.join(map( lambda x:str(x),resp.raw._original_response.peer)) log_info = json.dumps(dict(time=time.time(), productId=detail['productId'], name=detail['name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url)) self.logger.info(log_info) return tool.return_data(successful=True, data=detail) except Exception, e: raise
def detail(self, url): resp = self.session.get(url, verify=False) status_code = resp.status_code pqhtml = PyQuery(resp.text or 'nothing') #下架 if status_code == 404: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #其他错误 if status_code != 200: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_error( code=status_code, message='status_code:{0},need 200, message:{1}'.format( status_code, self.cfg.GET_ERR.get('SCERR', 'ERROR')), backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #错误 if len(pqhtml('.error_message')) >= 1: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get( 'SAKERR', 'ERROR'), backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #前期准备 area = pqhtml('#pdp-content-area') pdata = self.get_pdata(area) # print json.dumps(pdata) # exit() #下架 if pdata['sold_out_message']['enabled'] or pdata[ 'intl_shipping_restriction']['enabled']: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) detail = dict() #品牌 brand = pdata['brand_name']['label'] if pdata['brand_name'][ 'enabled'] else '' detail['brand'] = brand #名称 detail['name'] = pdata['short_description'] #货币 currency = pdata['price']['list_price']['local_currency_code'] detail['currency'] = currency detail['currencySymbol'] = tool.get_unit(currency) #价格 price, listPrice = self.get_all_price(pdata) detail['price'] = price detail['listPrice'] = listPrice #颜色,此处必须取color的id,虽然有为0的坑.但是下面价格是根据id来进行区分颜色的. color = { str(clor['id']): clor['label'] for clor in pdata['colors']['colors'] } colorId = { str(clor['id']): str(clor['id']) for clor in pdata['colors']['colors'] } detail['color'] = color or self.cfg.DEFAULT_ONE_COLOR detail['colorId'] = colorId or self.cfg.DEFAULT_COLOR_SKU #图片集 imgs = self.get_imgs(pdata, area) detail['img'] = imgs[0] if isinstance(imgs, list) else dict( [(cid, Arr[0]) for cid, Arr in imgs.items()]) detail['imgs'] = imgs #规格,包括多颜色的price.listPrice sprice, slistPrice, sizes = self.get_sizes(pdata) #钥匙 if sizes.keys(): detail['keys'] = sizes.keys() elif color: detail['keys'] = color.keys() # self.logger.debug('price.keys()->{}'.format(price.keys() if isinstance(price,dict) else 'not dict')) # self.logger.debug('color.keys()->{}'.format(color.keys() if isinstance(color,dict) else 'not dict')) # self.logger.debug('sizes.keys()->{}'.format(sizes.keys() if isinstance(sizes,dict) else 'not dict')) # self.logger.debug('detail[\'keys\']->{}'.format(detail['keys'] if 'keys' in detail else 'not keys')) #产品ID productId = pdata['product_code'] detail['productId'] = productId # print price,listPrice # print sprice,slistPrice detail['sizes'] = sizes detail['price'] = sprice detail['listPrice'] = slistPrice #描述 detail['descr'] = PyQuery(pdata['description']).text() #退换货 detail['returns'] = pdata['simple_shipping_statement']['message'] #HTTP状态码 detail['status_code'] = status_code #状态 detail['status'] = self.cfg.STATUS_SALE #返回链接 detail['backUrl'] = url log_info = json.dumps( dict(time=time.time(), productId=detail['productId'], name=detail['name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url)) self.logger.info(log_info) return tool.return_data(successful=True, data=detail)
def detail(self, url): try: resp = self.session.get(url, verify=False) status_code = resp.status_code pqhtml = PyQuery(resp.text or 'nothing') #下架: if status_code == 404: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #其他错误 if status_code != 200: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get( 'SCERR', 'ERROR'), backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #前期准备 Jtxt = pqhtml('script').text() area = pqhtml('#itemContent') # print area.outerHtml().encode('utf-8') #下架 if not area: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) pdata = json.loads( re.search(r'jsInit.item.colorSizeJson =\s*(.*?\}\});\s*', Jtxt, re.DOTALL).groups()[0]) detail = dict() #名称: name = re.search(r'tc_vars\["product_title"\] =\s*"(.*?)";', Jtxt, re.DOTALL).groups()[0] # name = json.loads(u'[{0}]'.format(HTMLParser().unescape(name)))[0] detail['name'] = area('#itemTitle').text() #品牌 brand = re.search(r'tc_vars\["product_brand"\] =\s*"(.*?)";', Jtxt, re.DOTALL).groups()[0] detail['brand'] = area( '#itemTitle span[itemprop="brand"]').text() or brand #货币符号 currency = re.search(r'tc_vars\["nav_currency"\] =\s*"(.*?)";', Jtxt, re.DOTALL).groups()[0] detail['currency'] = currency detail['currencySymbol'] = tool.get_unit(currency) #价格 detail['price'] = re.search( r'tc_vars\["product_discountprice"\] =\s*"(.*?)";', Jtxt, re.DOTALL).groups()[0] detail['listPrice'] = re.search( r'tc_vars\["product_price"\] =\s*"(.*?)";', Jtxt, re.DOTALL).groups()[0] #图片集 imgsTmp = self.get_imgs(Jtxt, area, pdata) detail['img'] = dict([ (cid, imgs[0]) for cid, imgs in imgsTmp.items() ]) if isinstance(imgsTmp, dict) else imgsTmp[0] detail['imgs'] = imgsTmp #规格 detail['sizes'] = self.get_sizes(pdata) #产品ID detail['productId'] = dict([(color['Cod10'], color['Cod10']) for color in pdata['Colors']]) #颜色 detail['color'] = dict([(color['Cod10'], color['Name']) for color in pdata['Colors']]) detail['colorId'] = dict([(color['Cod10'], color['Cod10']) for color in pdata['Colors']]) #描述,2016-09-25 12:31:54 修改 detail['descr'] = area('#item-infos li:first').remove( 'script').text() # detail['descr'] = area('#itemInfoTab #tabs-1').remove('script').text() #构造物,2016-09-25 12:31:54 修改 detail['fabric'] = area('#item-infos li:first').remove( 'script').text() # detail['fabric'] = area('#item-infos #tabs-1').remove('script').text() #退换货,2016-09-25 12:31:54 修改 detail['returns'] = area('#item-infos li:last').remove( 'script').text() # detail['returns'] = area('#item-infos #tabs-3').remove('script').text() #设计者 detail['designer'] = re.search( r'tc_vars\["product_author"\] =\s*"(.*?)";', Jtxt, re.DOTALL).groups()[0] #钥匙 detail['keys'] = [color['Cod10'] for color in pdata['Colors']] #HTTP状态码 detail['status_code'] = status_code #状态 detail['status'] = self.cfg.STATUS_SALE #返回链接 detail['backUrl'] = url log_info = json.dumps( dict(time=time.time(), productId=detail['productId'], name=detail['name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url)) self.logger.info(log_info) return tool.return_data(successful=True, data=detail) except Exception, e: raise
def detail(self, url): try: resp = self.session.get(url, verify=False) #加载第一次. #验证resp.防爬虫.!!! resp = self.resp_verify(resp) if 'window.location.reload(true);' in resp.text: resp = self.session.get(url, verify=False) #加载第二次. #会出现不返回内容的情况 while not resp.text: return self.detail(url, verify=False) status_code = resp.status_code pqhtml = PyQuery(resp.text or 'nothing') #下架 if status_code == 404: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #其他错误 if status_code != 200: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get( 'SCERR', 'ERROR'), backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #下架 if 'Out of stock' in pqhtml('.product-availability').text(): log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #前期准备 area = pqhtml('div#product-view') domain = tool.get_domain(url) pdata = self.get_pdata(area) detail = dict() #品牌 brand = area('.panel-a h1:first').text().split('-')[0].strip() detail['brand'] = brand #名称 detail['name'] = area('.panel-a h1:first').text() #货币 currency = pqhtml('meta[itemprop="priceCurrency"]').attr('content') detail['currency'] = currency detail['currencySymbol'] = tool.get_unit(currency) #产品ID productId = pdata['productId'] detail['productId'] = productId #价格 price, listPrice = pdata['basePrice'].replace( ',', ''), pdata['oldPrice'].replace(',', '') detail['price'] = price detail['listPrice'] = listPrice #颜色 # color = self.get_color(area) detail['color'] = area('button#product-addtocart-button').attr( 'data-variant') or self.cfg.DEFAULT_ONE_COLOR detail['colorId'] = productId #图片集 imgs = [ img.attr('data-src') for img in area('div#mobile-carousel-images a>img').items() ] detail['img'] = imgs[0] detail['imgs'] = imgs #规格 detail['sizes'] = self.get_sizes(area, pdata) #描述 detail['descr'] = area('div.tog-desc').text() + area.parent()( '.description-section:first').text() #HTTP状态码 detail['status_code'] = status_code #状态 detail['status'] = self.cfg.STATUS_SALE #返回链接 detail['backUrl'] = resp.url log_info = json.dumps( dict(time=time.time(), productId=detail['productId'], name=detail['name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url)) self.logger.info(log_info) return tool.return_data(successful=True, data=detail) except Exception, e: raise
def detail(self, url): resp = self.session.get(url, verify=False) status_code = resp.status_code pqhtml = PyQuery(resp.text or 'nothing') # 下架 if status_code == 404: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf( code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) # 其他错误 if status_code != 200: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get( 'SCERR', 'ERROR'), backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) # 前期准备 area = pqhtml('#main #primary') # domain = tool.get_domain(url) # pdata = self.get_pdata(area) # print area.outerHtml().encode('utf-8') # exit() # 下架 # if True : # log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url)) # self.logger.info(log_info) # data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml()) # return tool.return_data(successful=False, data=data) detail = dict() # 产品ID productId = area('[itemprop="productID"]:first').text().replace('#','') detail['productId'] = productId detail['productSku'] = productId detail['productCode'] = productId # 品牌 brand = area('.brand-name:first').text() detail['brand'] = brand # 名称 detail['name'] = ' '.join([brand,area('.product-name:first').text()]) # 价格 price, listPrice, currency = self.get_all_price(area) detail['price'] = price detail['listPrice'] = listPrice # 货币,该渠道只有 欧元,美元,英镑,三种单位. detail['currency'] = currency detail['currencySymbol'] = tool.get_unit(currency) # 描述 detail['descr'] = area('#pdpMain .product-detail .product-information').text() # 颜色 # color = self.get_color(area) detail['color'] = self.cfg.DEFAULT_ONE_COLOR detail['colorId'] = self.cfg.DEFAULT_COLOR_SKU # 图片集 imgs = [img.attr('src') for img in area('#pdpMain #product-col-2 img').items()] detail['img'] = imgs[0] detail['imgs'] = imgs # 规格 detail['sizes'] = self.get_sizes(area) # HTTP状态码 detail['status_code'] = status_code # 状态 detail['status'] = self.cfg.STATUS_SALE # 返回链接 detail['backUrl'] = resp.url # 返回的IP和端口 if resp.raw._original_response.peer: detail['ip_port'] = ':'.join( map(lambda x: str(x), resp.raw._original_response.peer)) log_info = json.dumps(dict(time=time.time(), productId=detail['productId'], name=detail['name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url)) self.logger.info(log_info) return tool.return_data(successful=True, data=detail)
def detail(self, url): try: resp = self.session.get(url, verify=False) status_code = resp.status_code pqhtml = PyQuery(resp.text or 'nothing') #下架 if status_code == 404: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #其他错误 if status_code != 200: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get( 'SCERR', 'ERROR'), backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #前期准备 area = pqhtml('#container') domain = tool.get_domain(url) pdata = self.get_pdata(pqhtml, domain) # print area.outerHtml() # print json.dumps(pdata) # exit() #下架 if not pdata['hasOrderableVariants']: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) detail = dict() #品牌 brand = area('.product-meta').attr('data-brand') detail['brand'] = brand #名称 detail['name'] = area('.product-meta').attr('data-productname') #货币 currency = re.search(r's\["currencyCode"\]="(\w{3})";', pqhtml('script').text()).groups()[0] detail['currency'] = currency detail['currencySymbol'] = tool.get_unit(currency) #获取信息. price, sizes = self.get_info(pdata) #价格 detail['price'] = price ptxt = area('.pricenotebucket').text() listPrice = re.search(r'\d[\d\.]', ptxt).groups()[0] if ptxt else price detail['listPrice'] = listPrice #颜色 status, color, imgs = self.get_color(pdata) detail['color'] = color detail['colorId'] = dict([(key, key) for key in color.keys()]) #钥匙 detail['keys'] = color.keys() #图片集 detail['img'] = imgs[0] if isinstance(imgs, list) else dict( [(cId, imgArr[0]) for cId, imgArr in imgs.items()]) detail['imgs'] = imgs #产品ID productId = area('.product-meta').attr('data-pid') detail['productId'] = productId #规格 detail['sizes'] = sizes #描述 detail['descr'] = area( 'section.product-details .longdescription').text() #详细 detail['detail'] = area('section.product-details').text() #HTTP状态码 detail['status_code'] = status_code #状态 detail['status'] = status #返回链接 detail['backUrl'] = resp.url log_info = json.dumps( dict(time=time.time(), productId=detail['productId'], name=detail['name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url)) self.logger.info(log_info) return tool.return_data(successful=True, data=detail) except Exception, e: raise
def detail(self,url): try: resp = self.session.get(url, verify=False) status_code = resp.status_code pqhtml = PyQuery(resp.text or 'nothing') #下架 if status_code == 404 : log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #其他错误 if status_code != 200 : log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url)) self.logger.info(log_info) data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get('SCERR','ERROR'), backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #前期准备 area = pqhtml('#goodsInfo') domain = tool.get_domain(url) # pdata = self.get_pdata(area) # print area.outerHtml().encode('utf-8') # exit() #下架 if not area : log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) detail = dict() #产品ID productId = pqhtml('#goodsForm input#bskGodGodNo').attr('value') detail['productId'] = productId detail['productSku'] = productId detail['productCode'] = area('.prd-code').text() #品牌 brand = pqhtml('#goodsForm input#brndNm').attr('value') detail['brand'] = brand #名称 detail['name'] = u'{0} {1}'.format(brand,pqhtml('#goodsForm input#godNm').attr('value')) #货币,价格 currency,price,listPrice = self.get_currency_prices(pqhtml,area) detail['currency'] = currency detail['currencySymbol'] = tool.get_unit(currency) detail['price'] = price detail['listPrice'] = listPrice #描述 detail['descr'] = pqhtml('meta[name="description"]').attr('content') #详细 detail['detail'] = pqhtml('meta[name="description"]').attr('content') + area('.desc-area').text() #颜色 color = self.get_color(area) detail['color'] = self.cfg.DEFAULT_ONE_COLOR detail['colorId'] = self.cfg.DEFAULT_COLOR_SKU #图片集 imgs = [ img.attr('src') for img in pqhtml('#prdImgWrap .prdImg ul>li>img').items()] detail['img'] = pqhtml('meta[property="og:image"][name="og_image"]').attr('content') detail['imgs'] = imgs #规格 detail['sizes'] = self.get_sizes(area) #HTTP状态码 detail['status_code'] = status_code #状态 detail['status'] = self.cfg.STATUS_SALE #返回链接 detail['backUrl'] = resp.url #返回的IP和端口 if resp.raw._original_response.peer : detail['ip_port']=':'.join(map( lambda x:str(x),resp.raw._original_response.peer)) log_info = json.dumps(dict(time=time.time(), productId=detail['productId'], name=detail['name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url)) self.logger.info(log_info) return tool.return_data(successful=True, data=detail) except Exception, e: raise
def detail(self, url): resp = self.session.get(url, verify=False) status_code = resp.status_code pqhtml = PyQuery(resp.text or 'nothing') add_tocart = pqhtml('#buy') # 下架 if status_code == 404 or not add_tocart: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) # 其他错误, 或没有加入购物车按钮 if status_code != 200: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get( 'SCERR', 'ERROR'), backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) # 前期准备 img_area = pqhtml('body div.left') prod_area = pqhtml('body .right') # print img_area.outerHtml().encode('utf-8') # print prod_area.outerHtml().encode('utf-8') # exit() # 下架 if not prod_area: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) detail = dict() # 产品ID productId = re.search(r'goods\/(\d+)[\/]?', url).groups()[0] detail['productId'] = productId detail['productSku'] = productId detail['productCode'] = productId # 品牌 brand = prod_area('p:last').text().replace(u'进入品牌', '').strip() detail['brand'] = brand # 名称 detail['name'] = prod_area('#kuriosity_code').prev().text() # 货币 currency = 'CNY' detail['currency'] = currency detail['currencySymbol'] = tool.get_unit(currency) # 价格 price, listPrice = self.get_all_price(prod_area) detail['price'] = price detail['listPrice'] = listPrice # 退换货 detail['returns'] = '' # img_area('div:last').text() # 描述 img_area('div:last').empty() # 清空售后说明 detail['descr'] = prod_area('.text').text() + img_area( 'div:first').text() # 颜色 # color = self.get_color(area) detail['color'] = self.cfg.DEFAULT_ONE_COLOR detail['colorId'] = self.cfg.DEFAULT_COLOR_SKU # 图片集 imgs = [ 'https://www.k11kuriosity.com' + img.attr('src') for img in img_area('img.small').items() ] detail['img'] = imgs[0] detail['imgs'] = imgs # 规格 detail['sizes'] = self.get_sizes(prod_area) # HTTP状态码 detail['status_code'] = status_code # 状态 detail['status'] = self.cfg.STATUS_SALE # 返回链接 detail['backUrl'] = resp.url # 返回的IP和端口 if resp.raw._original_response.peer: detail['ip_port'] = ':'.join( map(lambda x: str(x), resp.raw._original_response.peer)) log_info = json.dumps( dict(time=time.time(), productId=detail['productId'], name=detail['name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url)) self.logger.info(log_info) return tool.return_data(successful=True, data=detail)
def detail(self, url): resp = self.session.get(url, verify=False) status_code = resp.status_code pqhtml = PyQuery(resp.text or 'nothing') #下架 if status_code == 404: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) if status_code != 200: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_error(code=status_code, message='status_code Error', backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) # area = pqhtml('.product_schema_wrapper>.page_width') area = pqhtml('.container-full--small-only .grid') if not area: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) # print area.outerHtml().encode('utf-8') # exit() detail = dict() #名称 detail['name'] = area('h1.product__title').text() + ' ' + area( 'h2.product__desc').text() + ' ' + area( 'span.product__desc').text() #颜色 detail['color'] = area('span[itemprop="color"]').text() #图片集 # imgsTmp = [ a.attr('href') for a in area('.product-gallery__imgholder a').items() ] # imgsTmp = [ a.attr('data-zoom-image') for a in area('.product-gallery__imgholder a').items() ] imgsTmp = [ img.attr('data-lazy') or img.attr('src') for img in area('.product-gallery__imgholder a img').items() ] detail['img'] = imgsTmp[0] detail['imgs'] = imgsTmp #货币 currency = area('meta[itemprop="priceCurrency"]').attr('content') detail['currency'] = currency detail['currencySymbol'] = tool.get_unit(currency) #现价 price = area('meta[itemprop="price"]').attr('content') detail['price'] = price #原价 # detail['listPrice'] = area('span[itemprop="standard_price"]').text().replace(',','') listPriceBlock = area('span.product__price--old') detail['listPrice'] = re.search( r'(\d[\.\d,]*)', listPriceBlock.text()).groups()[0].replace( ',', '') if len(listPriceBlock) else price productInfo = area('#product-info') #描述 detail['descr'] = productInfo('#design').text() #品牌 detail['brand'] = 'REISS' #产品ID productId = area('span[itemprop="productID"]').text() detail['productId'] = productId #颜色ID detail['colorId'] = productId #配送和退货 detail['delivery'] = productInfo('#delivery').text() detail['returns'] = productInfo('#delivery').text() #设计 detail['designer'] = productInfo('#design').text() #sizeFit detail['sizeFit'] = productInfo('#size').text() #fabric detail['fabric'] = productInfo('#care').text() #规格 detail['sizes'] = [ dict(name=opt.text(), sku=opt('input').attr('value'), id=opt('input').attr('value'), inventory=self.cfg.DEFAULT_STOCK_NUMBER if opt.attr('class') != 'size_not_available' else 0) for opt in area('form .product-attributes .product-sizes .product-sizes__item' ).items() if len(opt('input')) #if 过滤没有库存的size. ] #没有sizes? if not detail['sizes']: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #HTTP状态码 detail['status_code'] = status_code #状态 detail['status'] = self.cfg.STATUS_SALE #返回链接 detail['backUrl'] = resp.url log_info = json.dumps( dict(time=time.time(), productId=detail['productId'], name=detail['name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url)) self.logger.info(log_info) return tool.return_data(successful=True, data=detail)
def detail(self, url): try: product_id = re.search(r'product/(\d+)-\w', url).groups()[0] api = 'https://api.gilt.com/v1/products/{product_id}/detail.json' params = dict(apikey=self.cfg.api_key) url = api.format(product_id=product_id) resp = requests.get(url, params=params) self.logger.debug('gilt product id:{0}, api response:{1}'.format( product_id, resp.text)) if resp.status_code == 404: data = tool.get_off_shelf(code=resp.status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=resp.text) return tool.return_data(successful=False, data=data) elif resp.status_code != 200: data = tool.get_error( code=resp.status_code, message='gilt api status_code error:{0}'.format( resp.status_code), backUrl=resp.url, html=resp.text) return tool.return_data(successful=False, data=data) product_detail = resp.json() detail = dict() #产品ID productId = product_detail['id'] detail['productId'] = productId #品牌 brand = product_detail['brand'] detail['brand'] = brand #名称 detail['name'] = '{0} {1}'.format(brand, product_detail['name']) #货币 currency = 'USD' # 接口返回无货币单位,默认为美元。 detail['currency'] = currency detail['currencySymbol'] = tool.get_unit(currency) #价格 price, listPrice, color, sizes = self.get_info(product_detail) detail['price'] = price detail['listPrice'] = listPrice #颜色 detail['color'] = color detail['colorId'] = productId #图片集 img_urls = [] for img_size, urls in product_detail['image_urls'].items(): if img_size == '420x560': img_urls = urls break else: raise ValueError('get 420x560 imgs fail') imgs = [img_url['url'] for img_url in img_urls] detail['img'] = imgs[0] detail['imgs'] = imgs #规格 detail['sizes'] = sizes #描述 detail['descr'] = ' '.join( product_detail['content'].values()) or 'no descr' #退换货 detail['returns'] = '' #HTTP状态码 detail['status_code'] = 200 #状态 detail['status'] = self.cfg.STATUS_SALE #返回链接 detail['backUrl'] = url #返回的IP和端口 if resp.raw._original_response.peer: detail['ip_port'] = ':'.join( map(lambda x: str(x), resp.raw._original_response.peer)) return tool.return_data(successful=True, data=detail) except Exception: raise
def detail(self,url): try: resp = self.session.get(url, verify=False) status_code = resp.status_code pqhtml = PyQuery(resp.text or 'nothing') #下架 if status_code == 404 : log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #其他错误 if status_code != 200 : log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url)) self.logger.info(log_info) data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get('SCERR','ERROR'), backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #前期准备 area = pqhtml('.fwd_page .fwd_content') # domain = tool.get_domain(url) # pdata = self.get_pdata(area) # print area.outerHtml().encode('utf-8') # print pqhtml.outerHtml() # exit() #下架 if 'Sold Out' in area('.stock_info:first').text() : log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) detail = dict() #品牌 brand = area('.product_info:first .designer_brand:first a:first').text() or area('.product_info:first .product-titles__brand a:first').text() detail['brand'] = brand #名称 detail['name'] = brand + ' ' + (area('.product_info:first h2.product_name:first').text() or area('.product_info:first h1.product_name:first').text()) #货币 currency = pqhtml('meta[itemprop="priceCurrency"]').attr('content') detail['currency'] = currency detail['currencySymbol'] = tool.get_unit(currency) #价格 price,listPrice = self.get_all_price(area('.eagle .prices')) detail['price'] = price detail['listPrice'] = listPrice #产品ID productId = area('button.addtobag').attr('data-code') detail['productId'] = productId #颜色 detail['color'] = area('.color_dd .one_sizeonly').text() or area('.color_dd option:first').text() detail['colorId'] = productId #图片集 imgs = [ a.attr('data-zoom-image') for a in area('.cycle-slideshow .product-detail-image-zoom img').items()] detail['img'] = imgs[0] detail['imgs'] = imgs #规格 detail['sizes'] = self.get_sizes(area) #描述 detail['descr'] = area('#details').text() #品牌描述 detail['brandDescr'] = area('#aboutdesigner').text() #退换货 detail['returns'] = area('#free_ship_popup').text() #HTTP状态码 detail['status_code'] = status_code #状态 detail['status'] = self.cfg.STATUS_SALE #返回链接 detail['backUrl'] = resp.url log_info = json.dumps(dict(time=time.time(), productId=detail['productId'], name=detail[ 'name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url)) self.logger.info(log_info) return tool.return_data(successful=True, data=detail) except Exception, e: raise
def detail(self, url): try: resp = self.session.get(url, verify=False) status_code = resp.status_code pqhtml = PyQuery(resp.text or 'nothing') #下架 if status_code == 404: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #其他错误 if status_code != 200: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get( 'SCERR', 'ERROR'), backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #前期准备 area = pqhtml('div.primary-content') domain = tool.get_domain(url) # print area.outerHtml().encode('utf-8') # exit() #下架 # if True : # log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url)) # self.logger.info(log_info) # data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml()) # return tool.return_data(successful=False, data=data) detail = dict() #产品ID # productId = area('input.productId').attr('value') productId = pqhtml('span[itemprop="productID"]').attr('content') detail['productId'] = productId detail['productSku'] = productId detail['productCode'] = productId #品牌 brand = pqhtml('span[itemprop="brand"]').attr('content') detail['brand'] = brand #名称 detail['name'] = pqhtml('span[itemprop="name"]').attr('content') #货币 currency = pqhtml('span[itemprop="priceCurrency"]').attr('content') detail['currency'] = currency detail['currencySymbol'] = tool.get_unit(currency) #价格 price, listPrice = self.get_all_price(area) price = pqhtml('span[itemprop="price"]').attr('content') detail['price'] = price detail['listPrice'] = listPrice #一级分类 detail['category'] = area('a[data-bigpopup="sizeChart"]').attr( 'data-category') #二级分类 detail['subcategory'] = area('a[data-bigpopup="sizeChart"]').attr( 'data-sub-category') #描述 detail['descr'] = pqhtml('span[itemprop="description"]').attr( 'content') #详细 detail['detail'] = area('#collapseOne').text() #退换货 detail['returns'] = area('#collapseFive').text() #颜色 # color = self.get_color(area) detail['color'] = pqhtml('span[itemprop="color"]').attr('content') detail['colorId'] = self.cfg.DEFAULT_COLOR_SKU #图片集 imgs = [ img.attr('src') for img in area( '.product-image-carousel img.primary-image').items() ] detail['img'] = pqhtml('span[itemprop="image"]').attr('content') detail['imgs'] = imgs #规格 detail['sizes'] = self.get_sizes(area) #HTTP状态码 detail['status_code'] = status_code #状态 detail['status'] = self.cfg.STATUS_SALE #返回链接 detail['backUrl'] = resp.url #返回的IP和端口 if resp.raw._original_response.peer: detail['ip_port'] = ':'.join( map(lambda x: str(x), resp.raw._original_response.peer)) log_info = json.dumps( dict(time=time.time(), productId=detail['productId'], name=detail['name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url)) self.logger.info(log_info) return tool.return_data(successful=True, data=detail) except Exception, e: raise
def detail(self, url): try: resp = self.session.get(url, timeout=self.cfg.REQUEST_TIME_OUT) # resp = requests.get(url,headers=self.session.headers,timeout=self.cfg.REQUEST_TIME_OUT) # print self.session.headers # resp = requests.get(url,headers=self.session.headers,timeout=20) status_code = resp.status_code pqhtml = PyQuery(resp.text or 'nothing') # print resp.headers #下架 if status_code == 404: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) if status_code != 200: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get( 'SCERR', 'ERROR'), backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) Jtxt = pqhtml('script').text() #下架 if 'productDetails' not in Jtxt: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) pdata = self.get_pdata(Jtxt) #前期准备 product = pdata['product'] allLooks = product['allLooks'] skuJournal = self.get_skuJournal(Jtxt) sizeAttribute = product['sizeAttribute'] if product.has_key( 'sizeAttribute') else { 'values': [{ 'id': 0, 'value': self.cfg.DEFAULT_ONE_SIZE }] } colorAttribute = product['colorAttribute'] if product.has_key( 'colorAttribute') else { 'values': [{ 'id': 0, 'value': self.cfg.DEFAULT_ONE_COLOR }] } #lookId 和 SkuArr 映射 # lookId2SkuArr = dict([(look['productLookId'],[Id['skuId'] for Id in look['skus']]) for look in allLooks]) #lookId 和 ImgArr 映射 lookId2ImgArr = dict([(look['productLookId'], [ 'http:' + img['retinaQuickViewLookUrl'] for img in look['images'] ]) for look in allLooks]) #lookId 和 现价 映射, 多颜色多价格 lookId2Price = dict([(look['productLookId'], look['pricing']['maxSkuSalePrice']['raw']) for look in allLooks]) #lookId 和 原价 映射,多颜色多价格 lookId2ListPrice = dict([ (look['productLookId'], look['pricing']['maxSkuMsrpPrice']['raw']) for look in allLooks ]) #lookId 和 skuArr 映射 lookId2SkuArr = dict([(look['productLookId'], [Id['skuId'] for Id in look['skus']]) for look in allLooks]) #sizeId 和 名称 映射 #{2000: u's', 2001: u'm', 2002: u'l', 2003: u'xl', 2004: u'xxl'} sizeId2Name = dict([(size['id'], size['value']) for size in sizeAttribute['values']]) #colorId 和 名称 映射 #{1000: u'dark red', 1001: u'true navy'} colorId2Name = dict([(color['id'], color['value']) for color in colorAttribute['values']]) #sku 和 有库存 映射 sku2Inventory = self.get_sku2Inventory(skuJournal) #sku 和 无库存 映射 sku2NoInventory = dict([ (sku['skuId'], sku['numberUnitsForSale']) for sku in skuJournal['entries'] if sku['type'] == 'inventory' and sku['status'] == ['X', 'U'] ]) #更新 库存 字典 sku2Inventory.update(sku2NoInventory) #sku 和 现价 映射, 多size多价格. sku2Price = dict([(sku['skuId'], str(sku['salePrice']['raw'])) for sku in skuJournal['entries'] if sku['type'] == 'pricing']) #sku 和 原价 映射, 多size多价格. sku2ListPrice = dict([(sku['skuId'], str(sku['msrpPrice']['raw'])) for sku in skuJournal['entries'] if sku['type'] == 'pricing']) #skuId 和 sizeId 映射 skuId2SizeId = dict([ (sku['skuId'], sku['savId']) for sku in skuJournal['entries'] if sku['type'] == 'associate' and sku['attribute'] == 'Size' ]) #skuId 和 colorId 映射 skuId2ColorId = dict([ (sku['skuId'], sku['savId']) for sku in skuJournal['entries'] if sku['type'] == 'associate' and sku['attribute'] == 'Color' ]) #sku 和 sizeName 映射 sku2SizeName = self.get_sku2SizeName(product, skuId2SizeId, sizeId2Name) #sku 和 colorName 映射 sku2ColorName = self.get_sku2ColorName(product, skuId2ColorId, colorId2Name) #lookId 和 colorId 映射 lookId2ColorId = self.get_lookIe2ColorId(lookId2SkuArr, skuId2ColorId) #lookId 和 colorName 映射 lookId2ColorName = self.get_lookIe2ColorName( lookId2SkuArr, sku2ColorName) #lookId 和 size集合 映射 lookId2Sizes = self.get_lookId2Sizes(lookId2SkuArr, sku2SizeName, sku2Inventory, sku2Price, sku2ListPrice) # print(json.dumps(sku2Price)) # print(json.dumps(sku2ListPrice)) # print(json.dumps(lookId2SkuArr)) # print(json.dumps(sku2ColorName)) # print(json.dumps(lookId2ColorName)) # print(json.dumps(sku2SizeName)) detail = dict() #只获取当前连接中的sku值 try: lookId = None if '-' in url[url.rindex('/'):]: lookId = url[url.rindex('/') + 1:].split('-')[0] lookIds = [int(lookId)] except Exception, e: pass #钥匙 detail['keys'] = lookId2SkuArr.keys() #只获取链接中lookId # detail['keys'] = lookIds or lookId2SkuArr.keys() #颜色 detail['color'] = lookId2ColorName detail['colorId'] = lookId2ColorId #产品ID detail['productId'] = product['productId'] #图片 detail['img'] = dict([(lookId, imgArr[0]) for lookId, imgArr in lookId2ImgArr.items()]) detail['imgs'] = lookId2ImgArr #规格 detail['sizes'] = lookId2Sizes #价格 detail['price'] = lookId2Price detail['listPrice'] = lookId2ListPrice #品牌 brand = pdata['brand']['name'] detail['brand'] = brand #名称 detail['name'] = brand + ' ' + pdata['product']['name'] #货币符号 currency = pdata['defaultLook']['pricing']['currencyCode'] detail['currency'] = currency detail['currencySymbol'] = tool.get_unit(currency) #退换货 detail['returns'] = pdata['returnPolicy']['description'] #描述 dtxt = PyQuery(pdata['product']['description']) dtxt.remove('strong') detail['descr'] = dtxt.text() #HTTP状态码 detail['status_code'] = status_code #状态 detail['status'] = self.cfg.STATUS_SALE #返回链接 detail['backUrl'] = resp.url log_info = json.dumps( dict(time=time.time(), productId=detail['productId'], name=detail['name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url)) self.logger.info(log_info) return tool.return_data(successful=True, data=detail)
def detail(self, url): try: resp = self.session.get(url, verify=False) status_code = resp.status_code pqhtml = PyQuery(resp.text or 'nothing') domain = tool.get_domain(url) #下架: if status_code == 404: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_off_shelf(code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #其他错误 if status_code != 200: log_info = json.dumps( dict(time=time.time(), title=pqhtml('title').text(), url=url)) self.logger.info(log_info) data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get( 'SCERR', 'ERROR'), backUrl=resp.url, html=pqhtml.outerHtml()) return tool.return_data(successful=False, data=data) #前期准备 Jtxt = pqhtml('script').text() area = pqhtml('#container') pdata = self.get_pdata(Jtxt) domain = tool.get_domain(url) #下架 # if not instock : # data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml()) # return tool.return_data(successful=False, data=data) # print area.outerHtml() # exit() detail = dict() #图片 imgsTmp = [ domain + a.attr('href') for a in area('form#addToCart ul.alt_imgs:first>li>a').items() ] detail['img'] = imgsTmp[0] detail['imgs'] = imgsTmp #名称 detail['name'] = pdata['product']['name'] #品牌 detail['brand'] = area('form#addToCart a#sameBrandProduct').text() #价格 detail['price'] = pdata['product']['unit_sale_price'] detail['listPrice'] = pdata['product']['unit_price'] #价格符号 currency = pdata['product']['currency'] detail['currency'] = currency detail['currencySymbol'] = tool.get_unit(currency) #产品id productId = pdata['product']['id'] detail['productId'] = productId #颜色 detail['color'] = self.cfg.DEFAULT_ONE_COLOR detail['colorId'] = productId #规格 detail['sizes'] = [ dict(name=self.cfg.DEFAULT_ONE_SIZE, inventory=pdata['product']['stock'], sku=productId) ] #描述 detail['descr'] = area('.prod_desc').text() + ' ' + area( 'div#info_tabs>div.wrap>div#tab1_info').text() #详细 detail['detail'] = area('#tab1_info').text() #品牌描述 detail['brandDescr'] = area('#tab2_info').text() #保修 detail['note'] = area('#tab5_info').text() #配送 detail['delivery'] = area('#shippingData').text() #HTTP状态码 detail['status_code'] = status_code #状态 detail['status'] = self.cfg.STATUS_SALE #返回链接 detail['backUrl'] = url log_info = json.dumps( dict(time=time.time(), productId=detail['productId'], name=detail['name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url)) self.logger.info(log_info) return tool.return_data(successful=True, data=detail) except Exception, e: raise