Пример #1
0
    def is_ok_status_code(self, status_code, pqhtml, url, resp):
        # 下架
        if status_code == 404:

            log_info = json.dumps(
                dict(time=time.time(), title=pqhtml('title').text(), url=url))

            self.logger.info(log_info)

            data = tool.get_off_shelf(code=status_code,
                                      message=self.cfg.SOLD_OUT,
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

            return False, tool.return_data(successful=False, data=data)

        # 其他错误
        if status_code != 200:

            log_info = json.dumps(
                dict(time=time.time(), title=pqhtml('title').text(), url=url))

            self.logger.info(log_info)

            data = tool.get_error(code=status_code,
                                  message=self.cfg.GET_ERR.get(
                                      'SCERR', 'ERROR'),
                                  backUrl=resp.url,
                                  html=pqhtml.outerHtml())

            return False, tool.return_data(successful=False, data=data)

        return True, ''
Пример #2
0
    def detail(self,url):
        try:
            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404 :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
                
                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200 :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get('SCERR','ERROR'), backUrl=resp.url, html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)
            
            #前期准备
            area = pqhtml('.body-wrap .primary-wrap .product-area')
            domain = tool.get_domain(url)
            siteObj = self.get_siteObj(pqhtml)
            
            print area.outerHtml().encode('utf-8')
            # exit()

            #下架
            if 'InStock' != area('meta[itemprop="availability"]').attr('content') or 'sold out' in area('.availability').text().lower() :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)
                data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
                
                return tool.return_data(successful=False, data=data)

            detail = dict()

            #产品ID
            productId = area('input[class="buy"][name="buy"][type="hidden"]').attr('value') or self.get_product_id(siteObj)
            detail['productId'] = productId

            #品牌
            brand = self.get_brand(siteObj)
            detail['brand'] = brand

            #名称
            detail['name'] = area('.product-title').text()

            #货币
            currency = pqhtml('meta[itemprop="priceCurrency"]').attr('content')
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #价格
            price,listPrice = self.get_all_price(area)
            detail['price'] = price
            detail['listPrice'] = listPrice

            #颜色,图片,尺码信息
            if area('.variation-dropdowns') :
                img,imgs,color,sizes = self.get_color_img_size(area,productId)
                detail['keys'] = color.keys()
            else :
                img = area('.main-product-image a').attr('href')
                imgs = [ li_a.attr('href').strip().replace('/300/300/','/600/600/') for li_a in area('ul.product-thumbnails li a').items()]
                color = self.cfg.DEFAULT_ONE_COLOR
                sizes = [dict(name=self.cfg.DEFAULT_ONE_SIZE,id=productId,sku=productId,inventory=self.cfg.DEFAULT_STOCK_NUMBER)]

            #颜色
            # color = self.get_color(area)
            detail['color'] = color
            detail['colorId'] = {cid:cid for cid in color.keys()} if isinstance(color,dict) else productId

            #图片集
            detail['img'] = img
            detail['imgs'] = imgs

            #规格
            detail['sizes'] = sizes

            #描述
            detail['descr'] = area('div[itemprop="description"]').text() + area('div.product-more-details').text()

            #详细
            detail['detail'] = area('div.product-more-details').text()

            #退换货
            detail['returns'] = area('.product-delivery-returns').text()

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = resp.url
            
            #返回的IP和端口
            if resp.raw._original_response.peer :
                detail['ip_port']=':'.join(map( lambda x:str(x),resp.raw._original_response.peer))

            log_info = json.dumps(dict(time=time.time(), 
                                       productId=detail['productId'], 
                                       name=detail['name'], 
                                       currency=detail['currency'], 
                                       price=detail['price'], 
                                       listPrice=detail['listPrice'], 
                                       url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #3
0
    def detail(self, url):
        try:

            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')

            if status_code == 404:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.GET_ERR.get(
                                              'SCERR', 'ERROR'),
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            if status_code != 200:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'SCERR', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            JscriptTxt = pqhtml('script').text()

            pqhtml.remove('script').remove('style')

            area = pqhtml('div#product-summary')

            # print area.outerHtml().encode('utf-8')

            buttonTxt = area('#product-form .add-button').text()

            if u'售罄' in buttonTxt.lower() or u'sold out' in buttonTxt.lower():

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.GET_ERR.get(
                                              'SCERR', 'ERROR'),
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            detail = dict()

            #所有图片
            imgs = self.get_imgs(pqhtml)
            detail['imgs'] = imgs
            detail['img'] = imgs[0]

            #名称
            detail['name'] = area('h1.brand').text() + ' ' + area(
                '.name').text()

            #货币
            currency = area('span.regular-price').text().split()[0]
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #价格
            price, listPrice = self.get_all_price(area)
            detail['price'] = price
            detail['listPrice'] = listPrice

            color, sizes = self.get_sizes(area)

            #颜色
            detail['color'] = color

            #sizes
            detail['sizes'] = sizes

            #下架:
            if isinstance(detail['sizes'],
                          basestring) and detail['sizes'] == 'sold out':

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #描述
            detail['descr'] = area('div#description').text() or pqhtml(
                '#product-details .product-details-section').text()

            #品牌
            detail['brand'] = area('h1.brand').text()

            #产品ID
            prodId = area.attr('data-id')
            detail['productId'] = prodId
            detail['colorId'] = prodId

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = resp.url

            #返回的IP和端口
            if resp.raw._original_response.peer:
                detail['ip_port'] = ':'.join(
                    map(lambda x: str(x), resp.raw._original_response.peer))

            log_info = json.dumps(
                dict(time=time.time(),
                     productId=detail['productId'],
                     name=detail['name'],
                     currency=detail['currency'],
                     price=detail['price'],
                     listPrice=detail['listPrice'],
                     url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #4
0
    def detail(self, url):
        try:
            resp = requests.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'SCERR', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #前期准备
            area = pqhtml('#bd .grid')
            # domain = tool.get_domain(url)
            pdata = self.get_pdata(pqhtml)

            # print area.outerHtml().encode('utf-8')

            #下架
            if not len(area('.p-buy #addCart .buynow')):

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #产品应该只有一个
            if len(pdata['prods']) != 1:
                raise ValueError('yintai product data length great than 1')

            detail = dict()

            #品牌
            brand = area('h4.y-pro-cooper-name').text()
            detail['brand'] = brand

            #名称
            detail['name'] = pdata['prods'][0]['name']

            #货币
            currency = 'CNY'
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            price = pdata['prods'][0]['price']

            if u'直降' in area('#Y_ProBen').text():
                self.session.headers['Referer'] = url
                self.session.headers['X-Requested-With'] = 'XMLHttpRequest'
                # self.session.headers['Origin'] = 'http://item.yintai.com'
                self.session.headers['Origin'] = url

                # subArea = PyQuery(self.session.post(url,data=dict()).text)
                subArea = PyQuery(
                    requests.post(url,
                                  data=dict(),
                                  headers=self.session.headers,
                                  cookies=resp.cookies).text)

                price = re.search(
                    r'(\d[\d\.]*)',
                    subArea('.marketPriceNum .yt-num').text()).groups()[0]
                price = price + subArea('.marketPriceNum .yt-num em').text()

            #价格,该业务逻辑后边删除
            detail['price'] = float(price)
            detail['listPrice'] = pdata['prods'][0]['mPrice']

            # print area('.productInfo .s-s-color').next()('a[href="Javascript:void(0);"]').outerHtml().encode('utf-8')
            # print area('.productInfo .s-s-color').next()('.selected a').text()

            #颜色
            # color = self.get_color(area)
            color = area('.productInfo .s-s-color').next()(
                'a[href="Javascript:void(0);"]').text()
            color = color or area('.productInfo .s-s-color').next()(
                '.selected a').text()  #2016-12-15添加
            detail['color'] = color
            detail['colorId'] = pdata['prods'][0]['colorID']

            #图片集
            imgs = self.get_imgs(area)
            detail['img'] = imgs[0]
            detail['imgs'] = imgs

            #产品ID
            productId = pdata['prods'][0]['sku']
            detail['productId'] = productId

            #规格
            detail['sizes'] = self.get_sizes(area)

            #描述
            detail['descr'] = area('.yp-con-desc').text()

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = resp.url

            self.session.cookies = RequestsCookieJar()
            self.session.headers = tool.get_one_header()

            log_info = json.dumps(
                dict(time=time.time(),
                     productId=detail['productId'],
                     name=detail['name'],
                     currency=detail['currency'],
                     price=detail['price'],
                     listPrice=detail['listPrice'],
                     url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            if 'get YinTai_TagData Fail' in str(e) and self._retry < 10:
                self._retry += 1
                return self.detail(url)
            elif self._retry >= 10:
                raise ValueError('yintai retry five times ,{0}'.format(str(e)))
            else:
                raise
Пример #5
0
    def detail(self,url):
        try:
            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404 :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
                
                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200 :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get('SCERR','ERROR'), backUrl=resp.url, html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)
            
            #前期准备
            area = pqhtml('.product-detail')
            detail_tab = pqhtml('#product-detail-tabs')
            img_tab = pqhtml('div.images')

            domain = tool.get_domain(url)
            pdata = self.get_pdata(pqhtml)

            # print area.outerHtml().encode('utf-8')
            # print json.dumps(pdata)
            # print detail_tab.outerHtml().encode('utf-8')
            # print img_tab.outerHtml().encode('utf-8')
            
            # exit()

            #下架
            if not area or 'out of stock' in area('.out-of-stock').text():

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)
                data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
                
                return tool.return_data(successful=False, data=data)

            video_prefix = 'http://image1.superdry.com/static/images/products/'

            detail = dict()

            detail['stock'] = pdata['product']['stock']     #该商品总库存.

            detail['video'] = video_prefix+pdata['product']['video']

            detail['gender'] = pdata['product']['gender']

            detail['season'] = pdata['product']['season']

            detail['category'] = pdata['product']['category']

            detail['productSku'] = pdata['product']['sku_code']

            detail['size_guide'] = pdata['product']['size_guide']

            detail['subcategory'] = pdata['product']['subcategory']

            detail['productCode'] = pdata['product']['sku_code']

            #产品ID
            productId = pdata['product']['id']
            detail['productId'] = productId
            
            #品牌
            brand = 'SUPERDRY'
            detail['brand'] = brand

            #名称
            detail['name'] = pdata['product']['name']

            #货币
            currency = pdata['product']['currency']
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #价格
            detail['price'] = pdata['product']['unit_sale_price']
            detail['listPrice'] = pdata['product']['unit_price']

            #描述
            detail['descr'] =  pdata['product']['description']

            #详细
            detail['detail'] = detail_tab.text()

            #退换货
            detail['returns'] = detail_tab('tab-page:last').text()

            #颜色
            detail['color'] = pdata['product']['color']
            detail['colorId'] = pdata['product']['color']

            #图片集
            imgs = [ ele.attr('src') for ele in img_tab('.scroller li img').items()]
            imgs = map(lambda x : x.replace('/productthumbs/','/zoom/'), imgs)
            detail['img'] = img_tab('.scroller li img:first').attr('src').replace('/productthumbs/','/zoom/')
            detail['imgs'] = imgs

            #规格
            detail['sizes'] = self.get_sizes(area)

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = resp.url
            
            #返回的IP和端口
            if resp.raw._original_response.peer :
                detail['ip_port']=':'.join(map( lambda x:str(x),resp.raw._original_response.peer))

            log_info = json.dumps(dict(time=time.time(), 
                                       productId=detail['productId'], 
                                       name=detail['name'], 
                                       currency=detail['currency'], 
                                       price=detail['price'], 
                                       listPrice=detail['listPrice'], 
                                       url=url))

            self.logger.info(log_info)


            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #6
0
    def detail(self,url):
        try:

            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')

            #下架
            if status_code == 404 :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200 :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get('SCERR','ERROR'), backUrl=resp.url, html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #前期准备:
            Jtxt = pqhtml('script').text()
            pdata = self.get_pdata(Jtxt)
            area = pqhtml('#detail-display-wrapper')

            #下架
            if not pdata :
                data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
                return tool.return_data(successful=False, data=data)

            detail = dict()

            #名称
            detail['name'] = pqhtml('h2.detail-title').text()

            #品牌
            detail['brand'] = self.get_brand(area)

            #价格符号
            currency = pqhtml('meta[itemprop="priceCurrency"]:first').attr('content')
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #颜色
            detail['color'] = pqhtml('ul.detail-additional-info:first>li:last').text()

            #价格
            detail['price'] = pdata['Products']['Info']['BasePrice'].replace(',','')
            listPrice = pdata['Products']['Info']['OldPrice'].replace(',','')
            detail['listPrice'] = (pqhtml('span.strokeText>span.price').text() or pqhtml('div#detail-display-info-wrapper span.price').text())[1:]

            #图片集合
            imgsTmp = [li.attr('data-zoom') for li in pqhtml('div#detail-display-icon ul').children('li').items()]
            detail['img'] = imgsTmp[0]
            detail['imgs'] = imgsTmp

            #规格
            detail['sizes'] = self.get_sizes(pdata,area)

            #描述
            detail['descr'] = area('p.detail-description:first').text()

            #产品ID
            detail['productId'] = pdata['Products']['Info']['ParentProductId']
            detail['colorId'] = pdata['Products']['Info']['ParentProductId']

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = url

            log_info = json.dumps(dict(time=time.time(), productId=detail['productId'], name=detail[
                                  'name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #7
0
    def detail(self, url):
        try:

            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            if status_code != 200:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'SCERR', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            area = pqhtml('#content')

            self.link_area = re.search(r'/en-(\w{2})/', url).groups()[0]

            SoldOut = self.checkSoldOut(pqhtml)

            if SoldOut:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())
                return tool.return_data(successful=False, data=data)

            pdata = self.get_pdata(area)

            detail = dict()

            #品牌
            brand = pdata['brand']['name']
            detail['brand'] = brand

            #名称
            detail['name'] = brand + ' ' + pdata['name']

            #货币单位
            currency = pdata['price']['currency']
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #价格
            price, listPrice = self.get_all_price(pdata)
            detail['price'] = price
            detail['listPrice'] = listPrice

            #图片集
            imgsTmp = self.get_imgs(area)
            detail['img'] = imgsTmp[0]
            detail['imgs'] = imgsTmp

            #规格
            sizesTmp = self.get_sizes(pdata)

            if sizesTmp is None:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())
                return tool.return_data(successful=False, data=data)

            #处理one size
            if len(sizesTmp) == 1 and sizesTmp[0]['name'].lower() in [
                    'one size', 'onesize'
            ]:
                sizesTmp[0]['name'] = self.cfg.DEFAULT_ONE_SIZE

            detail['sizes'] = sizesTmp

            #视频
            if 'videos' in pdata and pdata['videos']:
                detail['video'] = self.get_video(pdata)

            #产品注意:
            detail['note'] = area(
                'section.product-accordion--desktop>section:first').text()

            #产品sizeFit
            detail['sizeFit'] = area(
                'section.product-accordion--desktop>section:eq(1)').text()

            #产品详情
            detail['detail'] = area(
                'section.product-accordion--desktop>section:eq(2)').text()

            #产品送货
            detail['delivery'] = area(
                'section.product-accordion--desktop>section:last').text()

            #产品退货
            detail['returns'] = area(
                'section.product-accordion--desktop>section:last').text()

            #描述
            detail['descr'] = self.get_descr(area)

            #产品ID
            detail['productId'] = pdata['id']

            print

            #颜色
            detail['color'] = pdata['colourInfo'][0]['colourName'] if pdata[
                'colourInfo'] else self.cfg.DEFAULT_ONE_COLOR

            #颜色ID
            detail['colorId'] = (pdata['colourInfo'][0]['colourId']
                                 or self.cfg.DEFAULT_COLOR_SKU
                                 ) if pdata['colourInfo'] else pdata['id']

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = resp.url

            log_info = json.dumps(
                dict(time=time.time(),
                     productId=detail['productId'],
                     name=detail['name'],
                     currency=detail['currency'],
                     price=detail['price'],
                     listPrice=detail['listPrice'],
                     url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except TooManyRedirects as e:
            self.logger.exception(e)

            data = tool.get_off_shelf(code=0,
                                      message=self.cfg.SOLD_OUT,
                                      backUrl=url,
                                      html=str(e))

            return tool.return_data(successful=False, data=data)

        except Exception, e:
            raise
Пример #8
0
    def detail(self, url):
        try:
            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'SCERR', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #前期准备
            # area = pqhtml('.caption-product')
            area = pqhtml('.product-single-section-main')
            imgArea = pqhtml('.slider')
            domain = tool.get_domain(url)
            pdata = self.get_pdata(pqhtml('head'))

            # print area.outerHtml().encode('utf-8')
            # exit()

            #下架
            # if len(area('#variant-listbox')) == 0 :

            #     log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

            #     self.logger.info(log_info)

            #     data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())

            #     return tool.return_data(successful=False, data=data)

            detail = dict()

            #品牌
            brand = pdata['product']['vendor']
            detail['brand'] = brand

            #名称
            detail['name'] = area('h1[itemprop="name"]').text()

            #货币
            currency = pqhtml('meta[itemprop="priceCurrency"]').attr('content')
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #价格
            price, listPrice = self.get_all_price(area)
            detail['price'] = price
            detail['listPrice'] = listPrice

            #颜色
            # color = self.get_color(area)
            detail['color'] = self.cfg.DEFAULT_ONE_COLOR
            detail['colorId'] = pdata['product']['id']

            #图片集
            # imgs = [ 'https:'+a.attr('src') for a in imgArea('img').items()]
            imgs = [
                'http:' + img.attr('src')
                for img in area('.super-slider-main img').items()
            ]
            detail['img'] = imgs[0]
            detail['imgs'] = imgs

            #产品ID
            productId = pdata['product']['id']
            detail['productId'] = productId

            #规格
            detail['sizes'] = self.get_sizes(pdata, area)

            #描述
            detail['descr'] = area('.product-single-details-dropdown').text()

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = resp.url

            log_info = json.dumps(
                dict(time=time.time(),
                     productId=detail['productId'],
                     name=detail['name'],
                     currency=detail['currency'],
                     price=detail['price'],
                     listPrice=detail['listPrice'],
                     url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #9
0
    def detail(self, url):
        try:
            self.domain = tool.get_domain(url)

            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())
                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'SCERR', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #前期准备
            area = pqhtml('#content>#productContainer')
            pdata = self.get_pdata(pqhtml)

            # print area.outerHtml()

            # print json.dumps(pdata)
            # exit()

            #下架
            if not area or area('.productButtons #disabledAddtobasket'):

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            detail = dict()

            #品牌
            brand = 'COS'
            detail['brand'] = brand

            #名称
            detail['name'] = area('.productInfo h1:first').text()

            #货币
            currency = pqhtml('meta[property="og:price:currency"]').attr(
                'content')
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #价格
            price, listPrice = self.get_all_price(pqhtml, area)
            detail['price'] = price
            detail['listPrice'] = listPrice

            #颜色
            color = self.get_color(area)
            detail['color'] = color
            detail['colorId'] = dict([(key, key) for key in color.keys()])

            #图片集
            imgs = self.get_imgs(area)
            detail['img'] = imgs[0] if isinstance(imgs, list) else dict(
                [(cid, Arr[0]) for cid, Arr in imgs.items()])
            detail['imgs'] = imgs

            #钥匙
            detail['keys'] = color.keys()

            #产品ID
            productId = area('input[data-product-identifier!=""]').attr(
                'data-product-identifier')
            detail['productId'] = productId

            #规格
            detail['sizes'] = self.get_sizes(area)

            #描述
            detail['descr'] = area('.productInfo>.infowrap>dl>dd:first').text()

            #退换货
            detail['returns'] = area(
                '.productInfo>.infowrap>dl>dd:first').text()

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = url

            log_info = json.dumps(
                dict(time=time.time(),
                     productId=detail['productId'],
                     name=detail['name'],
                     currency=detail['currency'],
                     price=detail['price'],
                     listPrice=detail['listPrice'],
                     url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #10
0
    def detail(self, url):
        try:
            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())
                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'SCERR', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #前期准备
            area = pqhtml('.product-detail-container')
            domain = tool.get_domain(url)

            # print area.outerHtml()
            # exit()

            #下架
            if u'缺货' in area('#stock-status').text():

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)
                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            detail = dict()

            #品牌
            brand = area('#brand:first span').text() or area('#brand a').text()
            detail['brand'] = brand

            #名称
            detail['name'] = area('#name').text()

            #货币
            currency = area('#price-currency').attr('content')
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #价格
            price, listPrice = self.get_all_price(area)
            detail['price'] = price
            detail['listPrice'] = listPrice

            #颜色
            detail['color'] = self.cfg.DEFAULT_ONE_COLOR
            detail['colorId'] = self.cfg.DEFAULT_COLOR_SKU

            #图片集
            imgs = [
                a.attr('data-large-img') for a in area(
                    '.image-container  .thumbnail-container img').items()
            ] or [
                img.attr('src')
                for img in area('#iherb-product-zoom img').items()
            ]
            imgs = imgs or [
                area('#product-image .product-summary-image a').attr('href')
            ]
            detail['img'] = imgs[0]
            detail['imgs'] = imgs

            #产品ID
            productId = area('input[name="pid"]').attr('value')
            detail['productId'] = productId

            #规格
            stock_txt = area('#stock-status').text()

            inv = area('#ddlQty option:last').attr(
                'value'
            ) if 'In Stock' in stock_txt or u'有库存' in stock_txt else 0
            detail['sizes'] = [
                dict(name=self.cfg.DEFAULT_ONE_SIZE,
                     inventory=inv,
                     id=productId,
                     sku=productId)
            ]

            #描述
            detail['descr'] = area('#product-specs-list li').text()

            #详细
            detail['detail'] = pqhtml('div[itemprop="description"]').text()

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = url

            log_info = json.dumps(
                dict(time=time.time(),
                     productId=detail['productId'],
                     name=detail['name'],
                     currency=detail['currency'],
                     price=detail['price'],
                     listPrice=detail['listPrice'],
                     url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #11
0
    def detail(self, url):
        try:
            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'SCERR', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #前期准备
            area = pqhtml('#pdp-page')
            domain = tool.get_domain(url)

            # print area.outerHtml()
            # exit()

            #下架
            if not area:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)
                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            detail = dict()

            #产品ID
            productId = pqhtml('img[data-stylenumber!=""]').attr(
                'data-stylenumber').split('_')[0]
            detail['productId'] = productId

            #品牌
            brand = 'Lululemon'
            detail['brand'] = brand

            #名称
            detail['name'] = area('h1.OneLinkNoTx').text()

            #货币
            currency = pqhtml('input#currencyCode').attr('value').strip()
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #价格
            price, listPrice = self.get_all_price(area)
            detail['price'] = price
            detail['listPrice'] = listPrice

            #描述
            detail['descr'] = self.get_descr(pqhtml, area)

            #详细
            detail['detail'] = area('#fabric').text()

            #退换货
            detail['returns'] = ''

            colorDriver, colorCount = self.get_pdata(pqhtml)

            #颜色
            img, imgs, color = self.get_color(area, colorCount)
            detail['color'] = color
            detail['colorId'] = {key: key for key in color}

            #图片集
            detail['img'] = img
            detail['imgs'] = imgs

            #规格
            sizes, price = self.get_sizes(colorDriver)
            detail['sizes'] = sizes
            detail['price'] = price

            if isinstance(color, dict):
                detail['keys'] = [key for key in color]

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = resp.url

            #返回的IP和端口
            if resp.raw._original_response.peer:
                detail['ip_port'] = ':'.join(
                    map(lambda x: str(x), resp.raw._original_response.peer))

            log_info = json.dumps(
                dict(time=time.time(),
                     productId=detail['productId'],
                     name=detail['name'],
                     currency=detail['currency'],
                     price=detail['price'],
                     listPrice=detail['listPrice'],
                     url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #12
0
    def detail(self,url):
        try:
            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404 :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200 :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get('SCERR','ERROR'), backUrl=resp.url, html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)
            
            #前期准备
            area = pqhtml('.page-content')
            domain = tool.get_domain(url)
            # pdata = self.get_pdata(pqhtml)

            # print area.outerHtml()

            # print json.dumps(pdata)
            # exit()

            #下架
            if area('div[itemprop="availability"]').text().strip() != 'Available' :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
                return tool.return_data(successful=False, data=data)

            detail = dict()

            #品牌
            brand = 'Kit and Ace'
            detail['brand'] = brand

            #名称
            detail['name'] = area('h1[itemprop="name"]').text()

            #货币
            currency = pqhtml('meta[itemprop="priceCurrency"]').attr('content')
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #价格
            price,listPrice = self.get_all_price(area)
            detail['price'] = price
            detail['listPrice'] = listPrice

            #颜色
            color = self.get_color(area)
            detail['color'] = color
            detail['colorId'] = dict([ (key,key) for key in color.keys() ])

            #图片集
            imgs = self.get_imgs(area,domain)
            detail['img'] = imgs[0] if isinstance(imgs,list) else dict([ (cid,Arr[0]) for cid,Arr in imgs.items() ])
            detail['imgs'] = imgs

            #钥匙
            detail['keys'] = color.keys()

            #产品ID
            productId = area('.js-pdp-product-code').attr('data-product-id')
            detail['productId'] = productId

            #规格
            detail['sizes'] = self.get_sizes(area)

            #描述
            detail['descr'] = area('.pdp-desc__description').text()

            #构造物
            detail['fabric'] = area('.pdp-info-components').text()

            #详细
            detail['detail'] = area('.productDetailsPageSection1').text()

            #退换货
            detail['returns'] = area('.productInfo>.infowrap>dl>dd:first').text()

            #模特信息
            detail['model'] = self.get_model(area,color.keys())

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = url

            log_info = json.dumps(dict(time=time.time(), productId=detail['productId'], name=detail[
                                  'name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #13
0
    def detail(self,url):

        pqhtml = ''
        try:
            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404 :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
                
                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200 :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get('SCERR','ERROR'), backUrl=resp.url, html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)
            
            #前期准备
            area = pqhtml('form[name="productPage"]')
            domain = tool.get_domain(url)
            
            # print area.html().encode('utf-8')
            # exit()

            need_refresh_node = pqhtml('meta[http-equiv="refresh"]')
            if len(need_refresh_node) :
                time_limit = need_refresh_node.attr('content').strip().split(';')[0]

                sleep_seconds = int(time_limit)/2
                time.sleep(sleep_seconds)

                #<RequestsCookieJar[]>
                self.session.cookies.set('INSTART_SESSION_ID',str(int((time.time()-sleep_seconds)*1000)))

                resp = self.session.get(url, verify=False)

                pqhtml = PyQuery(resp.text)

                area = pqhtml('form[name="productPage"]')

            #下架
            if not area or len(area('.cannotorder')):
            # if not area :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)
                data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
                
                return tool.return_data(successful=False, data=data)

            productId = area('input[name$="productId"][value!=""]').attr('value')
            pdata = self.get_pdata(area,productId)

            detail = dict()


            #产品ID
            detail['productId'] = productId
            
            #品牌
            brand = area('input.cmDesignerName').attr('value')
            detail['brand'] = brand

            #名称
            detail['name'] =area('h1.product-name:first').text()

            #货币
            currency = pqhtml('meta[itemprop="priceCurrency"]').attr('content')
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #价格
            price,listPrice = self.get_all_price(area)

            detail['price'] = price
            detail['listPrice'] = listPrice

            #描述
            detail['descr'] = area('div[itemprop="description"]').text()

            #详细
            detail['detail'] = area('.product-details-info').text()

            #颜色
            # color = self.get_color(area)
            # detail['color'] = self.cfg.DEFAULT_ONE_COLOR
            # detail['colorId'] = self.cfg.DEFAULT_COLOR_SKU

            #图片集
            img,imgs = self.get_imgs(area)
            detail['img'] = img
            detail['imgs'] = imgs

            #规格
            sizes = self.get_sizes(pdata)
            detail['sizes'] = sizes

            if isinstance(sizes,dict):
                detail['keys'] = sizes.keys()
                detail['color'] = {key:key for key in sizes}
                detail['colorId'] = {key:key for key in sizes}

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = resp.url
            
            #返回的IP和端口
            if resp.raw._original_response.peer :
                detail['ip_port']=':'.join(map( lambda x:str(x),resp.raw._original_response.peer))

            log_info = json.dumps(dict(time=time.time(), 
                                       productId=detail['productId'], 
                                       name=detail['name'], 
                                       currency=detail['currency'], 
                                       price=detail['price'], 
                                       listPrice=detail['listPrice'], 
                                       url=url))

            self.logger.info(log_info)


            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            self.logger.exception('html:{0}'.format(pqhtml))
            raise
Пример #14
0
    def detail(self, url):
        try:
            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())
                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'SCERR', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #前期准备
            area = pqhtml('.product-detail-information')
            # domain = tool.get_domain(url)

            # exit()

            #下架
            # if area('div[itemprop="availability"]').text().strip() != 'Available' :
            #     data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
            #     return tool.return_data(successful=False, data=data)

            detail = dict()

            #品牌
            # brand = re.search(r'brand: \'(.*?)\',',pqhtml('script[type="text/javascript"]').text(),re.DOTALL).groups()[0]
            brand = pqhtml('.product-brand img:first').attr('alt').split()[0]
            detail['brand'] = brand

            #名称 ,最近修改,2016-09-30 16:36:32
            detail['name'] = area('.J_title_name').text() or area(
                '.title-name').text()

            #货币
            currency = pqhtml('a#select_currency').text().split()[0]
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #价格
            price, listPrice = self.get_all_price(area)
            detail['price'] = price
            detail['listPrice'] = listPrice

            #颜色
            # color = self.get_color(area)
            detail['color'] = self.cfg.DEFAULT_ONE_COLOR
            detail['colorId'] = self.cfg.DEFAULT_COLOR_SKU

            #图片集
            imgs = [
                a.attr('href') for a in pqhtml(
                    '.product-detail-preview .toolbar>li>a').items()
            ]
            detail['img'] = imgs[0]
            detail['imgs'] = imgs

            #产品ID
            productId = area('.product-detail-selection-sku').text()
            detail['productId'] = productId

            #规格
            detail['sizes'] = self.get_sizes(area)

            #描述
            detail['descr'] = area('#product-description-tab').text()

            #详细
            detail['detail'] = area('#product-description-tab').text()

            #退换货
            detail['returns'] = area('.product-directions').text()

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = url

            log_info = json.dumps(
                dict(time=time.time(),
                     productId=detail['productId'],
                     name=detail['name'],
                     currency=detail['currency'],
                     price=detail['price'],
                     listPrice=detail['listPrice'],
                     url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #15
0
    def detail(self, url):
        resp = self.session.get(url, verify=False)

        status_code = resp.status_code
        pqhtml = PyQuery(resp.text or 'nothing')
        #下架
        if status_code == 404:

            log_info = json.dumps(
                dict(time=time.time(), title=pqhtml('title').text(), url=url))

            self.logger.info(log_info)

            data = tool.get_off_shelf(code=status_code,
                                      message=self.cfg.SOLD_OUT,
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

            return tool.return_data(successful=False, data=data)

        #其他错误
        if status_code != 200:

            log_info = json.dumps(
                dict(time=time.time(), title=pqhtml('title').text(), url=url))

            self.logger.info(log_info)

            data = tool.get_error(code=status_code,
                                  message=self.cfg.GET_ERR.get(
                                      'SCERR', 'ERROR'),
                                  backUrl=resp.url,
                                  html=pqhtml.outerHtml())

            return tool.return_data(successful=False, data=data)

        #前期准备
        # area = pqhtml('.product-detail-information')
        self.domain = tool.get_domain(url)
        # pdata = self.get_pdata(area)

        # print pqhtml.outerHtml().encode('utf-8')
        # exit()

        #下架
        # if True :

        # log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

        # self.logger.info(log_info)
        # data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())

        # return tool.return_data(successful=False, data=data)

        detail = dict()

        #品牌
        brand = self.getBrandByHtml(pqhtml).strip()
        detail['brand'] = brand or 'MYGEEK'

        #名称
        detail['name'] = pqhtml('span.title strong').text().strip()

        #货币
        currency = 'CNY'
        detail['currency'] = currency
        detail['currencySymbol'] = tool.get_unit(currency)

        #价格
        price, listPrice = self.getPriceByHtml(pqhtml)
        detail['price'] = price
        detail['listPrice'] = listPrice

        #颜色
        color = self.get_color(pqhtml)
        detail['color'] = color
        detail['colorId'] = {k: k
                             for k in color.keys()} if isinstance(
                                 color, dict) else self.cfg.DEFAULT_COLOR_SKU

        #skus:
        if isinstance(color, dict):
            detail['keys'] = color.keys()

        #图片集
        imgs = self.getImgsByHtml(pqhtml)
        detail['img'] = imgs[0]
        detail['imgs'] = imgs

        #产品ID
        productId = re.search(
            r'id=(\d*)',
            pqhtml('div.pid5 form:first').attr('action')).groups()[0]
        detail['productId'] = productId

        #规格
        detail['sizes'] = self.getSizesByHtml(pqhtml)

        #描述
        detail['descr'] = pqhtml('#pid1_2').remove('.title').remove(
            'script').text() + pqhtml('.pid2').remove('.title').remove(
                'script').text()

        #HTTP状态码
        detail['status_code'] = status_code

        #状态
        detail['status'] = self.cfg.STATUS_SALE

        #返回链接
        detail['backUrl'] = resp.url

        log_info = json.dumps(
            dict(time=time.time(),
                 productId=detail['productId'],
                 name=detail['name'],
                 currency=detail['currency'],
                 price=detail['price'],
                 listPrice=detail['listPrice'],
                 url=url))

        self.logger.info(log_info)

        return tool.return_data(successful=True, data=detail)
Пример #16
0
    def detail(self, url):
        resp = self.session.get(url, verify=False)

        status_code = resp.status_code
        pqhtml = PyQuery(resp.text or 'nothing')

        # 下架
        if status_code == 404:

            log_info = json.dumps(
                dict(time=time.time(), title=pqhtml('title').text(), url=url))

            self.logger.info(log_info)

            data = tool.get_off_shelf(code=status_code,
                                      message=self.cfg.SOLD_OUT,
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())
            return tool.return_data(successful=False, data=data)

        # 其他错误
        if status_code != 200:

            log_info = json.dumps(
                dict(time=time.time(), title=pqhtml('title').text(), url=url))

            self.logger.info(log_info)

            data = tool.get_error(code=status_code,
                                  message=self.cfg.GET_ERR.get(
                                      'SCERR', 'ERROR'),
                                  backUrl=resp.url,
                                  html=pqhtml.outerHtml())

            return tool.return_data(successful=False, data=data)

        # 前期准备:
        # Jtxt = pqhtml('script').text()
        pdata = self.get_pdata(pqhtml)
        area = pqhtml('.productDetailSummary')
        pinfo = pqhtml('#productInfo')
        imgPath = url.split('/')[3]

        # print area.outerHtml()
        # print json.dumps(pdata)
        # exit()

        # 下架
        if not pdata:

            log_info = json.dumps(
                dict(time=time.time(), title=pqhtml('title').text(), url=url))

            self.logger.info(log_info)

            data = tool.get_off_shelf(code=status_code,
                                      message=self.cfg.SOLD_OUT,
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())
            return tool.return_data(successful=False, data=data)

        detail = dict()

        # 名称
        detail['name'] = pqhtml('.productName').text()

        # 品牌
        detail['brand'] = pqhtml('.productName a').text()

        # 货币
        currency = area('span[itemprop="priceCurrency"]').text()
        detail['currency'] = currency
        detail['currencySymbol'] = tool.get_unit(currency)

        # 价格
        price, listPrice = self.get_all_price(area)
        detail['price'] = price
        detail['listPrice'] = listPrice

        # 图片集
        img, imgs = self.get_imgs(area, imgPath)
        detail['img'] = img
        detail['imgs'] = imgs

        # 视频
        if len(area('.MagicScroll .productVideo')) > 0:
            detail['video'] = area('.MagicScroll a.productVideo').attr(
                'data-video-url')

        # 颜色
        colors, sizes = self.get_colors_sizes(area, pdata)
        detail['color'] = colors
        detail['sizes'] = sizes

        detail['keys'] = colors.keys()

        detail['colorId'] = dict([(key, key) for key in colors.keys()])

        # 产品ID
        productId = area('input#baseNo').attr('value')
        detail['productId'] = productId

        # 描述
        detail['descr'] = pinfo('#overview').text()

        # 详情
        detail['detail'] = pinfo('#specs').text()

        # HTTP状态码
        detail['status_code'] = status_code

        # 状态
        detail['status'] = self.cfg.STATUS_SALE

        # 返回链接
        detail['backUrl'] = url

        log_info = json.dumps(
            dict(time=time.time(),
                 productId=detail['productId'],
                 name=detail['name'],
                 currency=detail['currency'],
                 price=detail['price'],
                 listPrice=detail['listPrice'],
                 url=url))

        self.logger.info(log_info)

        return tool.return_data(successful=True, data=detail)
Пример #17
0
    def detail(self,url):
        try:
            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404 :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
                
                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200 :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get('SCERR','ERROR'), backUrl=resp.url, html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)
            
            #前期准备
            area = pqhtml('#theater')
            domain = tool.get_domain(url)
            pdata = self.get_pdata(pqhtml('script:gt(20)'))
            
            # exit()

            #下架
            # if True :

                # log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                # self.logger.info(log_info)
                # data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
                
                # return tool.return_data(successful=False, data=data)

            detail = dict()

            #品牌
            brand = area('.brand').text()
            detail['brand'] = brand

            #名称
            detail['name'] = area('h1:first').text()


            currencySymbol,price,listPrice = self.get_price_info(pdata)

            if currencySymbol != '$' :
                raise ValueError('currencySymbol is not USD')

            #货币
            currency = 'USD'
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #价格
            detail['price'] = price
            detail['listPrice'] = listPrice

            #颜色
            color = self.get_color(pdata)
            detail['color'] = color
            detail['colorId'] = {cid:cid for cid in color.keys() }

            #图片集
            img,imgs = self.get_imgs(pdata)
            detail['img'] = img
            detail['imgs'] = imgs

            #产品ID
            productId = pqhtml('input[name="productId"]').attr('value')
            detail['productId'] = productId

            #规格
            sizes = self.get_sizes(pdata)
            detail['sizes'] = sizes

            #描述
            detail['descr'] = area('.description').text()

            detail['keys'] = set(img.keys())&set(sizes.keys())

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = resp.url
            
            #返回的IP和端口
            if resp.raw._original_response.peer :
                detail['ip_port']=':'.join(map( lambda x:str(x),resp.raw._original_response.peer))

            log_info = json.dumps(dict(time=time.time(), 
                                       productId=detail['productId'], 
                                       name=detail['name'], 
                                       currency=detail['currency'], 
                                       price=detail['price'], 
                                       listPrice=detail['listPrice'], 
                                       url=url))

            self.logger.info(log_info)


            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #18
0
    def detail(self, url):
        resp = self.session.get(url, verify=False)

        status_code = resp.status_code
        pqhtml = PyQuery(resp.text or 'nothing')
        #下架
        if status_code == 404:

            log_info = json.dumps(
                dict(time=time.time(), title=pqhtml('title').text(), url=url))

            self.logger.info(log_info)

            data = tool.get_off_shelf(code=status_code,
                                      message=self.cfg.SOLD_OUT,
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())
            return tool.return_data(successful=False, data=data)

        #其他错误
        if status_code != 200:

            log_info = json.dumps(
                dict(time=time.time(), title=pqhtml('title').text(), url=url))

            self.logger.info(log_info)

            data = tool.get_error(
                code=status_code,
                message='status_code:{0},need 200, message:{1}'.format(
                    status_code, self.cfg.GET_ERR.get('SCERR', 'ERROR')),
                backUrl=resp.url,
                html=pqhtml.outerHtml())

            return tool.return_data(successful=False, data=data)

        #错误
        if len(pqhtml('.error_message')) >= 1:

            log_info = json.dumps(
                dict(time=time.time(), title=pqhtml('title').text(), url=url))

            self.logger.info(log_info)

            data = tool.get_error(code=status_code,
                                  message=self.cfg.GET_ERR.get(
                                      'SAKERR', 'ERROR'),
                                  backUrl=resp.url,
                                  html=pqhtml.outerHtml())
            return tool.return_data(successful=False, data=data)

        #前期准备
        area = pqhtml('#pdp-content-area')
        pdata = self.get_pdata(area)

        # print json.dumps(pdata)
        # exit()

        #下架
        if pdata['sold_out_message']['enabled'] or pdata[
                'intl_shipping_restriction']['enabled']:

            log_info = json.dumps(
                dict(time=time.time(), title=pqhtml('title').text(), url=url))

            self.logger.info(log_info)

            data = tool.get_off_shelf(code=status_code,
                                      message=self.cfg.SOLD_OUT,
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())
            return tool.return_data(successful=False, data=data)

        detail = dict()

        #品牌
        brand = pdata['brand_name']['label'] if pdata['brand_name'][
            'enabled'] else ''
        detail['brand'] = brand

        #名称
        detail['name'] = pdata['short_description']

        #货币
        currency = pdata['price']['list_price']['local_currency_code']
        detail['currency'] = currency
        detail['currencySymbol'] = tool.get_unit(currency)

        #价格
        price, listPrice = self.get_all_price(pdata)

        detail['price'] = price
        detail['listPrice'] = listPrice

        #颜色,此处必须取color的id,虽然有为0的坑.但是下面价格是根据id来进行区分颜色的.
        color = {
            str(clor['id']): clor['label']
            for clor in pdata['colors']['colors']
        }
        colorId = {
            str(clor['id']): str(clor['id'])
            for clor in pdata['colors']['colors']
        }
        detail['color'] = color or self.cfg.DEFAULT_ONE_COLOR
        detail['colorId'] = colorId or self.cfg.DEFAULT_COLOR_SKU

        #图片集
        imgs = self.get_imgs(pdata, area)

        detail['img'] = imgs[0] if isinstance(imgs, list) else dict(
            [(cid, Arr[0]) for cid, Arr in imgs.items()])
        detail['imgs'] = imgs

        #规格,包括多颜色的price.listPrice
        sprice, slistPrice, sizes = self.get_sizes(pdata)

        #钥匙
        if sizes.keys():
            detail['keys'] = sizes.keys()
        elif color:
            detail['keys'] = color.keys()

        # self.logger.debug('price.keys()->{}'.format(price.keys() if isinstance(price,dict) else 'not dict'))
        # self.logger.debug('color.keys()->{}'.format(color.keys() if isinstance(color,dict) else 'not dict'))
        # self.logger.debug('sizes.keys()->{}'.format(sizes.keys() if isinstance(sizes,dict) else 'not dict'))
        # self.logger.debug('detail[\'keys\']->{}'.format(detail['keys'] if 'keys' in detail else 'not keys'))

        #产品ID
        productId = pdata['product_code']
        detail['productId'] = productId

        # print price,listPrice
        # print sprice,slistPrice
        detail['sizes'] = sizes
        detail['price'] = sprice
        detail['listPrice'] = slistPrice

        #描述
        detail['descr'] = PyQuery(pdata['description']).text()

        #退换货
        detail['returns'] = pdata['simple_shipping_statement']['message']

        #HTTP状态码
        detail['status_code'] = status_code

        #状态
        detail['status'] = self.cfg.STATUS_SALE

        #返回链接
        detail['backUrl'] = url

        log_info = json.dumps(
            dict(time=time.time(),
                 productId=detail['productId'],
                 name=detail['name'],
                 currency=detail['currency'],
                 price=detail['price'],
                 listPrice=detail['listPrice'],
                 url=url))

        self.logger.info(log_info)

        return tool.return_data(successful=True, data=detail)
Пример #19
0
    def detail(self, url):
        try:

            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')

            #下架:
            if status_code == 404:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())
                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'SCERR', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #前期准备
            Jtxt = pqhtml('script').text()
            area = pqhtml('#itemContent')

            # print area.outerHtml().encode('utf-8')

            #下架
            if not area:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())
                return tool.return_data(successful=False, data=data)

            pdata = json.loads(
                re.search(r'jsInit.item.colorSizeJson =\s*(.*?\}\});\s*', Jtxt,
                          re.DOTALL).groups()[0])

            detail = dict()

            #名称:
            name = re.search(r'tc_vars\["product_title"\] =\s*"(.*?)";', Jtxt,
                             re.DOTALL).groups()[0]
            # name = json.loads(u'[{0}]'.format(HTMLParser().unescape(name)))[0]
            detail['name'] = area('#itemTitle').text()

            #品牌
            brand = re.search(r'tc_vars\["product_brand"\] =\s*"(.*?)";', Jtxt,
                              re.DOTALL).groups()[0]
            detail['brand'] = area(
                '#itemTitle span[itemprop="brand"]').text() or brand

            #货币符号
            currency = re.search(r'tc_vars\["nav_currency"\] =\s*"(.*?)";',
                                 Jtxt, re.DOTALL).groups()[0]
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #价格
            detail['price'] = re.search(
                r'tc_vars\["product_discountprice"\] =\s*"(.*?)";', Jtxt,
                re.DOTALL).groups()[0]
            detail['listPrice'] = re.search(
                r'tc_vars\["product_price"\] =\s*"(.*?)";', Jtxt,
                re.DOTALL).groups()[0]

            #图片集
            imgsTmp = self.get_imgs(Jtxt, area, pdata)
            detail['img'] = dict([
                (cid, imgs[0]) for cid, imgs in imgsTmp.items()
            ]) if isinstance(imgsTmp, dict) else imgsTmp[0]
            detail['imgs'] = imgsTmp

            #规格
            detail['sizes'] = self.get_sizes(pdata)

            #产品ID
            detail['productId'] = dict([(color['Cod10'], color['Cod10'])
                                        for color in pdata['Colors']])

            #颜色
            detail['color'] = dict([(color['Cod10'], color['Name'])
                                    for color in pdata['Colors']])
            detail['colorId'] = dict([(color['Cod10'], color['Cod10'])
                                      for color in pdata['Colors']])

            #描述,2016-09-25 12:31:54 修改
            detail['descr'] = area('#item-infos li:first').remove(
                'script').text()
            # detail['descr'] = area('#itemInfoTab #tabs-1').remove('script').text()

            #构造物,2016-09-25 12:31:54 修改
            detail['fabric'] = area('#item-infos li:first').remove(
                'script').text()
            # detail['fabric'] = area('#item-infos #tabs-1').remove('script').text()

            #退换货,2016-09-25 12:31:54 修改
            detail['returns'] = area('#item-infos li:last').remove(
                'script').text()
            # detail['returns'] = area('#item-infos #tabs-3').remove('script').text()

            #设计者
            detail['designer'] = re.search(
                r'tc_vars\["product_author"\] =\s*"(.*?)";', Jtxt,
                re.DOTALL).groups()[0]

            #钥匙
            detail['keys'] = [color['Cod10'] for color in pdata['Colors']]

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = url

            log_info = json.dumps(
                dict(time=time.time(),
                     productId=detail['productId'],
                     name=detail['name'],
                     currency=detail['currency'],
                     price=detail['price'],
                     listPrice=detail['listPrice'],
                     url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #20
0
    def detail(self, url):
        try:

            resp = self.session.get(url, verify=False)  #加载第一次.

            #验证resp.防爬虫.!!!
            resp = self.resp_verify(resp)

            if 'window.location.reload(true);' in resp.text:

                resp = self.session.get(url, verify=False)  #加载第二次.

            #会出现不返回内容的情况
            while not resp.text:
                return self.detail(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')

            #下架
            if status_code == 404:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'SCERR', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #下架
            if 'Out of stock' in pqhtml('.product-availability').text():

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #前期准备
            area = pqhtml('div#product-view')
            domain = tool.get_domain(url)
            pdata = self.get_pdata(area)

            detail = dict()

            #品牌
            brand = area('.panel-a h1:first').text().split('-')[0].strip()
            detail['brand'] = brand

            #名称
            detail['name'] = area('.panel-a h1:first').text()

            #货币
            currency = pqhtml('meta[itemprop="priceCurrency"]').attr('content')
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #产品ID
            productId = pdata['productId']
            detail['productId'] = productId

            #价格
            price, listPrice = pdata['basePrice'].replace(
                ',', ''), pdata['oldPrice'].replace(',', '')
            detail['price'] = price
            detail['listPrice'] = listPrice

            #颜色
            # color = self.get_color(area)
            detail['color'] = area('button#product-addtocart-button').attr(
                'data-variant') or self.cfg.DEFAULT_ONE_COLOR
            detail['colorId'] = productId

            #图片集
            imgs = [
                img.attr('data-src')
                for img in area('div#mobile-carousel-images a>img').items()
            ]
            detail['img'] = imgs[0]
            detail['imgs'] = imgs

            #规格
            detail['sizes'] = self.get_sizes(area, pdata)

            #描述
            detail['descr'] = area('div.tog-desc').text() + area.parent()(
                '.description-section:first').text()

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = resp.url

            log_info = json.dumps(
                dict(time=time.time(),
                     productId=detail['productId'],
                     name=detail['name'],
                     currency=detail['currency'],
                     price=detail['price'],
                     listPrice=detail['listPrice'],
                     url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #21
0
    def detail(self, url):
        resp = self.session.get(url, verify=False)

        status_code = resp.status_code
        pqhtml = PyQuery(resp.text or 'nothing')
        # 下架
        if status_code == 404:

            log_info = json.dumps(
                dict(time=time.time(), title=pqhtml('title').text(), url=url))

            self.logger.info(log_info)

            data = tool.get_off_shelf(
                code=status_code, message=self.cfg.SOLD_OUT, backUrl=resp.url, html=pqhtml.outerHtml())

            return tool.return_data(successful=False, data=data)

        # 其他错误
        if status_code != 200:

            log_info = json.dumps(
                dict(time=time.time(), title=pqhtml('title').text(), url=url))

            self.logger.info(log_info)

            data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get(
                'SCERR', 'ERROR'), backUrl=resp.url, html=pqhtml.outerHtml())

            return tool.return_data(successful=False, data=data)

        # 前期准备
        area = pqhtml('#main #primary')
        # domain = tool.get_domain(url)
        # pdata = self.get_pdata(area)

        # print area.outerHtml().encode('utf-8')
        # exit()

        # 下架
        # if True :

        # log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

        # self.logger.info(log_info)
        # data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())

        # return tool.return_data(successful=False, data=data)

        detail = dict()

        # 产品ID
        productId = area('[itemprop="productID"]:first').text().replace('#','')
        detail['productId'] = productId
        detail['productSku'] = productId
        detail['productCode'] = productId

        # 品牌
        brand = area('.brand-name:first').text()
        detail['brand'] = brand

        # 名称
        detail['name'] = ' '.join([brand,area('.product-name:first').text()])

        # 价格
        price, listPrice, currency = self.get_all_price(area)
        detail['price'] = price
        detail['listPrice'] = listPrice

        # 货币,该渠道只有 欧元,美元,英镑,三种单位.
        detail['currency'] = currency
        detail['currencySymbol'] = tool.get_unit(currency)

        # 描述
        detail['descr'] = area('#pdpMain .product-detail .product-information').text()

        # 颜色
        # color = self.get_color(area)
        detail['color'] = self.cfg.DEFAULT_ONE_COLOR
        detail['colorId'] = self.cfg.DEFAULT_COLOR_SKU

        # 图片集
        imgs = [img.attr('src') for img in area('#pdpMain #product-col-2 img').items()]
        detail['img'] = imgs[0]
        detail['imgs'] = imgs

        # 规格
        detail['sizes'] = self.get_sizes(area)

        # HTTP状态码
        detail['status_code'] = status_code

        # 状态
        detail['status'] = self.cfg.STATUS_SALE

        # 返回链接
        detail['backUrl'] = resp.url

        # 返回的IP和端口
        if resp.raw._original_response.peer:
            detail['ip_port'] = ':'.join(
                map(lambda x: str(x), resp.raw._original_response.peer))

        log_info = json.dumps(dict(time=time.time(),
                                   productId=detail['productId'],
                                   name=detail['name'],
                                   currency=detail['currency'],
                                   price=detail['price'],
                                   listPrice=detail['listPrice'],
                                   url=url))

        self.logger.info(log_info)

        return tool.return_data(successful=True, data=detail)
Пример #22
0
    def detail(self, url):
        try:
            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'SCERR', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #前期准备
            area = pqhtml('#container')
            domain = tool.get_domain(url)
            pdata = self.get_pdata(pqhtml, domain)

            # print area.outerHtml()
            # print json.dumps(pdata)
            # exit()

            #下架
            if not pdata['hasOrderableVariants']:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            detail = dict()

            #品牌
            brand = area('.product-meta').attr('data-brand')
            detail['brand'] = brand

            #名称
            detail['name'] = area('.product-meta').attr('data-productname')

            #货币
            currency = re.search(r's\["currencyCode"\]="(\w{3})";',
                                 pqhtml('script').text()).groups()[0]
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #获取信息.
            price, sizes = self.get_info(pdata)

            #价格
            detail['price'] = price

            ptxt = area('.pricenotebucket').text()
            listPrice = re.search(r'\d[\d\.]',
                                  ptxt).groups()[0] if ptxt else price

            detail['listPrice'] = listPrice

            #颜色
            status, color, imgs = self.get_color(pdata)
            detail['color'] = color
            detail['colorId'] = dict([(key, key) for key in color.keys()])

            #钥匙
            detail['keys'] = color.keys()

            #图片集
            detail['img'] = imgs[0] if isinstance(imgs, list) else dict(
                [(cId, imgArr[0]) for cId, imgArr in imgs.items()])
            detail['imgs'] = imgs

            #产品ID
            productId = area('.product-meta').attr('data-pid')
            detail['productId'] = productId

            #规格
            detail['sizes'] = sizes

            #描述
            detail['descr'] = area(
                'section.product-details .longdescription').text()

            #详细
            detail['detail'] = area('section.product-details').text()

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = status

            #返回链接
            detail['backUrl'] = resp.url

            log_info = json.dumps(
                dict(time=time.time(),
                     productId=detail['productId'],
                     name=detail['name'],
                     currency=detail['currency'],
                     price=detail['price'],
                     listPrice=detail['listPrice'],
                     url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #23
0
    def detail(self,url):
        try:
            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404 :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
                
                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200 :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get('SCERR','ERROR'), backUrl=resp.url, html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)
            
            #前期准备
            area = pqhtml('#goodsInfo')
            domain = tool.get_domain(url)
            # pdata = self.get_pdata(area)
            
            # print area.outerHtml().encode('utf-8')
            # exit()

            #下架
            if not area :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)
                data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
                
                return tool.return_data(successful=False, data=data)

            detail = dict()


            #产品ID
            productId = pqhtml('#goodsForm input#bskGodGodNo').attr('value')
            detail['productId'] = productId
            detail['productSku'] = productId
            detail['productCode'] = area('.prd-code').text()
            
            #品牌
            brand = pqhtml('#goodsForm input#brndNm').attr('value')
            detail['brand'] = brand

            #名称
            detail['name'] = u'{0} {1}'.format(brand,pqhtml('#goodsForm input#godNm').attr('value'))

            #货币,价格
            currency,price,listPrice = self.get_currency_prices(pqhtml,area)
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            detail['price'] = price
            detail['listPrice'] = listPrice

            #描述
            detail['descr'] = pqhtml('meta[name="description"]').attr('content')

            #详细
            detail['detail'] = pqhtml('meta[name="description"]').attr('content') + area('.desc-area').text()

            #颜色
            color = self.get_color(area)
            detail['color'] = self.cfg.DEFAULT_ONE_COLOR
            detail['colorId'] = self.cfg.DEFAULT_COLOR_SKU

            #图片集
            imgs = [ img.attr('src') for img in pqhtml('#prdImgWrap .prdImg ul>li>img').items()]
            detail['img'] = pqhtml('meta[property="og:image"][name="og_image"]').attr('content')
            detail['imgs'] = imgs

            #规格
            detail['sizes'] = self.get_sizes(area)

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = resp.url
            
            #返回的IP和端口
            if resp.raw._original_response.peer :
                detail['ip_port']=':'.join(map( lambda x:str(x),resp.raw._original_response.peer))

            log_info = json.dumps(dict(time=time.time(), 
                                       productId=detail['productId'], 
                                       name=detail['name'], 
                                       currency=detail['currency'], 
                                       price=detail['price'], 
                                       listPrice=detail['listPrice'], 
                                       url=url))

            self.logger.info(log_info)


            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #24
0
    def detail(self, url):
        resp = self.session.get(url, verify=False)

        status_code = resp.status_code
        pqhtml = PyQuery(resp.text or 'nothing')
        add_tocart = pqhtml('#buy')

        # 下架
        if status_code == 404 or not add_tocart:

            log_info = json.dumps(
                dict(time=time.time(), title=pqhtml('title').text(), url=url))

            self.logger.info(log_info)

            data = tool.get_off_shelf(code=status_code,
                                      message=self.cfg.SOLD_OUT,
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

            return tool.return_data(successful=False, data=data)

        # 其他错误, 或没有加入购物车按钮
        if status_code != 200:

            log_info = json.dumps(
                dict(time=time.time(), title=pqhtml('title').text(), url=url))

            self.logger.info(log_info)

            data = tool.get_error(code=status_code,
                                  message=self.cfg.GET_ERR.get(
                                      'SCERR', 'ERROR'),
                                  backUrl=resp.url,
                                  html=pqhtml.outerHtml())

            return tool.return_data(successful=False, data=data)

        # 前期准备
        img_area = pqhtml('body div.left')
        prod_area = pqhtml('body .right')

        # print img_area.outerHtml().encode('utf-8')
        # print prod_area.outerHtml().encode('utf-8')
        # exit()

        # 下架
        if not prod_area:

            log_info = json.dumps(
                dict(time=time.time(), title=pqhtml('title').text(), url=url))

            self.logger.info(log_info)
            data = tool.get_off_shelf(code=status_code,
                                      message=self.cfg.SOLD_OUT,
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

            return tool.return_data(successful=False, data=data)

        detail = dict()

        # 产品ID
        productId = re.search(r'goods\/(\d+)[\/]?', url).groups()[0]
        detail['productId'] = productId
        detail['productSku'] = productId
        detail['productCode'] = productId

        # 品牌
        brand = prod_area('p:last').text().replace(u'进入品牌', '').strip()
        detail['brand'] = brand

        # 名称
        detail['name'] = prod_area('#kuriosity_code').prev().text()

        # 货币
        currency = 'CNY'
        detail['currency'] = currency
        detail['currencySymbol'] = tool.get_unit(currency)

        # 价格
        price, listPrice = self.get_all_price(prod_area)
        detail['price'] = price
        detail['listPrice'] = listPrice

        # 退换货
        detail['returns'] = ''  # img_area('div:last').text()

        # 描述
        img_area('div:last').empty()  # 清空售后说明
        detail['descr'] = prod_area('.text').text() + img_area(
            'div:first').text()

        # 颜色
        # color = self.get_color(area)
        detail['color'] = self.cfg.DEFAULT_ONE_COLOR
        detail['colorId'] = self.cfg.DEFAULT_COLOR_SKU

        # 图片集
        imgs = [
            'https://www.k11kuriosity.com' + img.attr('src')
            for img in img_area('img.small').items()
        ]
        detail['img'] = imgs[0]
        detail['imgs'] = imgs

        # 规格
        detail['sizes'] = self.get_sizes(prod_area)

        # HTTP状态码
        detail['status_code'] = status_code

        # 状态
        detail['status'] = self.cfg.STATUS_SALE

        # 返回链接
        detail['backUrl'] = resp.url

        # 返回的IP和端口
        if resp.raw._original_response.peer:
            detail['ip_port'] = ':'.join(
                map(lambda x: str(x), resp.raw._original_response.peer))

        log_info = json.dumps(
            dict(time=time.time(),
                 productId=detail['productId'],
                 name=detail['name'],
                 currency=detail['currency'],
                 price=detail['price'],
                 listPrice=detail['listPrice'],
                 url=url))

        self.logger.info(log_info)

        return tool.return_data(successful=True, data=detail)
Пример #25
0
    def detail(self, url):
        resp = self.session.get(url, verify=False)

        status_code = resp.status_code
        pqhtml = PyQuery(resp.text or 'nothing')
        #下架
        if status_code == 404:

            log_info = json.dumps(
                dict(time=time.time(), title=pqhtml('title').text(), url=url))

            self.logger.info(log_info)

            data = tool.get_off_shelf(code=status_code,
                                      message=self.cfg.SOLD_OUT,
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

            return tool.return_data(successful=False, data=data)

        if status_code != 200:

            log_info = json.dumps(
                dict(time=time.time(), title=pqhtml('title').text(), url=url))

            self.logger.info(log_info)

            data = tool.get_error(code=status_code,
                                  message='status_code Error',
                                  backUrl=resp.url,
                                  html=pqhtml.outerHtml())

            return tool.return_data(successful=False, data=data)

        # area = pqhtml('.product_schema_wrapper>.page_width')
        area = pqhtml('.container-full--small-only .grid')

        if not area:
            log_info = json.dumps(
                dict(time=time.time(), title=pqhtml('title').text(), url=url))
            self.logger.info(log_info)
            data = tool.get_off_shelf(code=status_code,
                                      message=self.cfg.SOLD_OUT,
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

            return tool.return_data(successful=False, data=data)

        # print area.outerHtml().encode('utf-8')

        # exit()

        detail = dict()

        #名称
        detail['name'] = area('h1.product__title').text() + ' ' + area(
            'h2.product__desc').text() + ' ' + area(
                'span.product__desc').text()

        #颜色
        detail['color'] = area('span[itemprop="color"]').text()

        #图片集
        # imgsTmp = [ a.attr('href') for a in area('.product-gallery__imgholder a').items() ]
        # imgsTmp = [ a.attr('data-zoom-image') for a in area('.product-gallery__imgholder a').items() ]
        imgsTmp = [
            img.attr('data-lazy') or img.attr('src')
            for img in area('.product-gallery__imgholder a img').items()
        ]
        detail['img'] = imgsTmp[0]
        detail['imgs'] = imgsTmp

        #货币
        currency = area('meta[itemprop="priceCurrency"]').attr('content')
        detail['currency'] = currency
        detail['currencySymbol'] = tool.get_unit(currency)

        #现价
        price = area('meta[itemprop="price"]').attr('content')
        detail['price'] = price

        #原价
        # detail['listPrice'] = area('span[itemprop="standard_price"]').text().replace(',','')
        listPriceBlock = area('span.product__price--old')
        detail['listPrice'] = re.search(
            r'(\d[\.\d,]*)', listPriceBlock.text()).groups()[0].replace(
                ',', '') if len(listPriceBlock) else price

        productInfo = area('#product-info')
        #描述
        detail['descr'] = productInfo('#design').text()

        #品牌
        detail['brand'] = 'REISS'

        #产品ID
        productId = area('span[itemprop="productID"]').text()
        detail['productId'] = productId

        #颜色ID
        detail['colorId'] = productId

        #配送和退货
        detail['delivery'] = productInfo('#delivery').text()
        detail['returns'] = productInfo('#delivery').text()

        #设计
        detail['designer'] = productInfo('#design').text()

        #sizeFit
        detail['sizeFit'] = productInfo('#size').text()

        #fabric
        detail['fabric'] = productInfo('#care').text()

        #规格
        detail['sizes'] = [
            dict(name=opt.text(),
                 sku=opt('input').attr('value'),
                 id=opt('input').attr('value'),
                 inventory=self.cfg.DEFAULT_STOCK_NUMBER
                 if opt.attr('class') != 'size_not_available' else 0)
            for opt in
            area('form .product-attributes .product-sizes .product-sizes__item'
                 ).items() if len(opt('input'))  #if 过滤没有库存的size.
        ]

        #没有sizes?
        if not detail['sizes']:

            log_info = json.dumps(
                dict(time=time.time(), title=pqhtml('title').text(), url=url))

            self.logger.info(log_info)

            data = tool.get_off_shelf(code=status_code,
                                      message=self.cfg.SOLD_OUT,
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

            return tool.return_data(successful=False, data=data)

        #HTTP状态码
        detail['status_code'] = status_code

        #状态
        detail['status'] = self.cfg.STATUS_SALE

        #返回链接
        detail['backUrl'] = resp.url

        log_info = json.dumps(
            dict(time=time.time(),
                 productId=detail['productId'],
                 name=detail['name'],
                 currency=detail['currency'],
                 price=detail['price'],
                 listPrice=detail['listPrice'],
                 url=url))

        self.logger.info(log_info)

        return tool.return_data(successful=True, data=detail)
Пример #26
0
    def detail(self, url):
        try:

            product_id = re.search(r'product/(\d+)-\w', url).groups()[0]

            api = 'https://api.gilt.com/v1/products/{product_id}/detail.json'
            params = dict(apikey=self.cfg.api_key)

            url = api.format(product_id=product_id)
            resp = requests.get(url, params=params)

            self.logger.debug('gilt product id:{0}, api response:{1}'.format(
                product_id, resp.text))

            if resp.status_code == 404:
                data = tool.get_off_shelf(code=resp.status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=resp.text)
                return tool.return_data(successful=False, data=data)

            elif resp.status_code != 200:
                data = tool.get_error(
                    code=resp.status_code,
                    message='gilt api status_code error:{0}'.format(
                        resp.status_code),
                    backUrl=resp.url,
                    html=resp.text)
                return tool.return_data(successful=False, data=data)

            product_detail = resp.json()

            detail = dict()

            #产品ID
            productId = product_detail['id']
            detail['productId'] = productId

            #品牌
            brand = product_detail['brand']
            detail['brand'] = brand

            #名称
            detail['name'] = '{0} {1}'.format(brand, product_detail['name'])

            #货币
            currency = 'USD'  #  接口返回无货币单位,默认为美元。
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #价格
            price, listPrice, color, sizes = self.get_info(product_detail)

            detail['price'] = price
            detail['listPrice'] = listPrice

            #颜色
            detail['color'] = color
            detail['colorId'] = productId

            #图片集
            img_urls = []
            for img_size, urls in product_detail['image_urls'].items():
                if img_size == '420x560':

                    img_urls = urls
                    break
            else:
                raise ValueError('get 420x560 imgs fail')

            imgs = [img_url['url'] for img_url in img_urls]
            detail['img'] = imgs[0]
            detail['imgs'] = imgs

            #规格
            detail['sizes'] = sizes

            #描述
            detail['descr'] = ' '.join(
                product_detail['content'].values()) or 'no descr'

            #退换货
            detail['returns'] = ''

            #HTTP状态码
            detail['status_code'] = 200

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = url

            #返回的IP和端口
            if resp.raw._original_response.peer:
                detail['ip_port'] = ':'.join(
                    map(lambda x: str(x), resp.raw._original_response.peer))

            return tool.return_data(successful=True, data=detail)

        except Exception:
            raise
Пример #27
0
    def detail(self,url):
        try:
            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404 :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
                
                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200 :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get('SCERR','ERROR'), backUrl=resp.url, html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)
            
            #前期准备
            area = pqhtml('.fwd_page .fwd_content')
            # domain = tool.get_domain(url)
            # pdata = self.get_pdata(area)
            
            # print area.outerHtml().encode('utf-8')
            # print pqhtml.outerHtml()
            # exit()

            #下架
            if 'Sold Out' in area('.stock_info:first').text() :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)
                data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
                
                return tool.return_data(successful=False, data=data)

            detail = dict()

            #品牌
            brand = area('.product_info:first .designer_brand:first a:first').text() or area('.product_info:first .product-titles__brand a:first').text()
            detail['brand'] = brand

            #名称
            detail['name'] = brand + ' ' + (area('.product_info:first h2.product_name:first').text() or area('.product_info:first h1.product_name:first').text())

            #货币
            currency = pqhtml('meta[itemprop="priceCurrency"]').attr('content')
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #价格
            price,listPrice = self.get_all_price(area('.eagle .prices'))
            detail['price'] = price
            detail['listPrice'] = listPrice

            #产品ID
            productId = area('button.addtobag').attr('data-code')
            detail['productId'] = productId

            #颜色
            detail['color'] = area('.color_dd .one_sizeonly').text() or area('.color_dd option:first').text()
            detail['colorId'] = productId

            #图片集
            imgs = [ a.attr('data-zoom-image') for a in area('.cycle-slideshow .product-detail-image-zoom img').items()]
            detail['img'] = imgs[0]
            detail['imgs'] = imgs

            #规格
            detail['sizes'] = self.get_sizes(area)

            #描述
            detail['descr'] = area('#details').text()

            #品牌描述
            detail['brandDescr'] = area('#aboutdesigner').text()

            #退换货
            detail['returns'] = area('#free_ship_popup').text()

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = resp.url

            log_info = json.dumps(dict(time=time.time(), productId=detail['productId'], name=detail[
                                  'name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url))

            self.logger.info(log_info)


            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #28
0
    def detail(self, url):
        try:
            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'SCERR', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #前期准备
            area = pqhtml('div.primary-content')
            domain = tool.get_domain(url)

            # print area.outerHtml().encode('utf-8')
            # exit()

            #下架
            # if True :

            # log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

            # self.logger.info(log_info)
            # data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())

            # return tool.return_data(successful=False, data=data)

            detail = dict()

            #产品ID
            # productId = area('input.productId').attr('value')
            productId = pqhtml('span[itemprop="productID"]').attr('content')
            detail['productId'] = productId
            detail['productSku'] = productId
            detail['productCode'] = productId

            #品牌
            brand = pqhtml('span[itemprop="brand"]').attr('content')
            detail['brand'] = brand

            #名称
            detail['name'] = pqhtml('span[itemprop="name"]').attr('content')

            #货币
            currency = pqhtml('span[itemprop="priceCurrency"]').attr('content')
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #价格
            price, listPrice = self.get_all_price(area)
            price = pqhtml('span[itemprop="price"]').attr('content')
            detail['price'] = price
            detail['listPrice'] = listPrice

            #一级分类
            detail['category'] = area('a[data-bigpopup="sizeChart"]').attr(
                'data-category')

            #二级分类
            detail['subcategory'] = area('a[data-bigpopup="sizeChart"]').attr(
                'data-sub-category')

            #描述
            detail['descr'] = pqhtml('span[itemprop="description"]').attr(
                'content')

            #详细
            detail['detail'] = area('#collapseOne').text()

            #退换货
            detail['returns'] = area('#collapseFive').text()

            #颜色
            # color = self.get_color(area)
            detail['color'] = pqhtml('span[itemprop="color"]').attr('content')
            detail['colorId'] = self.cfg.DEFAULT_COLOR_SKU

            #图片集
            imgs = [
                img.attr('src') for img in area(
                    '.product-image-carousel img.primary-image').items()
            ]
            detail['img'] = pqhtml('span[itemprop="image"]').attr('content')
            detail['imgs'] = imgs

            #规格
            detail['sizes'] = self.get_sizes(area)

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = resp.url

            #返回的IP和端口
            if resp.raw._original_response.peer:
                detail['ip_port'] = ':'.join(
                    map(lambda x: str(x), resp.raw._original_response.peer))

            log_info = json.dumps(
                dict(time=time.time(),
                     productId=detail['productId'],
                     name=detail['name'],
                     currency=detail['currency'],
                     price=detail['price'],
                     listPrice=detail['listPrice'],
                     url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #29
0
    def detail(self, url):
        try:
            resp = self.session.get(url, timeout=self.cfg.REQUEST_TIME_OUT)
            # resp = requests.get(url,headers=self.session.headers,timeout=self.cfg.REQUEST_TIME_OUT)
            # print self.session.headers
            # resp = requests.get(url,headers=self.session.headers,timeout=20)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')

            # print resp.headers

            #下架
            if status_code == 404:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            if status_code != 200:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'SCERR', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            Jtxt = pqhtml('script').text()

            #下架
            if 'productDetails' not in Jtxt:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())
                return tool.return_data(successful=False, data=data)

            pdata = self.get_pdata(Jtxt)

            #前期准备
            product = pdata['product']
            allLooks = product['allLooks']
            skuJournal = self.get_skuJournal(Jtxt)
            sizeAttribute = product['sizeAttribute'] if product.has_key(
                'sizeAttribute') else {
                    'values': [{
                        'id': 0,
                        'value': self.cfg.DEFAULT_ONE_SIZE
                    }]
                }
            colorAttribute = product['colorAttribute'] if product.has_key(
                'colorAttribute') else {
                    'values': [{
                        'id': 0,
                        'value': self.cfg.DEFAULT_ONE_COLOR
                    }]
                }

            #lookId 和 SkuArr 映射
            # lookId2SkuArr = dict([(look['productLookId'],[Id['skuId'] for Id in look['skus']]) for look in allLooks])
            #lookId 和 ImgArr 映射
            lookId2ImgArr = dict([(look['productLookId'], [
                'http:' + img['retinaQuickViewLookUrl']
                for img in look['images']
            ]) for look in allLooks])
            #lookId 和 现价 映射, 多颜色多价格
            lookId2Price = dict([(look['productLookId'],
                                  look['pricing']['maxSkuSalePrice']['raw'])
                                 for look in allLooks])
            #lookId 和 原价 映射,多颜色多价格
            lookId2ListPrice = dict([
                (look['productLookId'],
                 look['pricing']['maxSkuMsrpPrice']['raw'])
                for look in allLooks
            ])
            #lookId 和 skuArr 映射
            lookId2SkuArr = dict([(look['productLookId'],
                                   [Id['skuId'] for Id in look['skus']])
                                  for look in allLooks])
            #sizeId 和 名称 映射  #{2000: u's', 2001: u'm', 2002: u'l', 2003: u'xl', 2004: u'xxl'}
            sizeId2Name = dict([(size['id'], size['value'])
                                for size in sizeAttribute['values']])
            #colorId 和 名称 映射   #{1000: u'dark red', 1001: u'true navy'}
            colorId2Name = dict([(color['id'], color['value'])
                                 for color in colorAttribute['values']])
            #sku 和 有库存 映射
            sku2Inventory = self.get_sku2Inventory(skuJournal)
            #sku 和 无库存 映射
            sku2NoInventory = dict([
                (sku['skuId'], sku['numberUnitsForSale'])
                for sku in skuJournal['entries']
                if sku['type'] == 'inventory' and sku['status'] == ['X', 'U']
            ])
            #更新 库存 字典
            sku2Inventory.update(sku2NoInventory)
            #sku 和 现价 映射, 多size多价格.
            sku2Price = dict([(sku['skuId'], str(sku['salePrice']['raw']))
                              for sku in skuJournal['entries']
                              if sku['type'] == 'pricing'])
            #sku 和 原价 映射, 多size多价格.
            sku2ListPrice = dict([(sku['skuId'], str(sku['msrpPrice']['raw']))
                                  for sku in skuJournal['entries']
                                  if sku['type'] == 'pricing'])
            #skuId 和 sizeId 映射
            skuId2SizeId = dict([
                (sku['skuId'], sku['savId']) for sku in skuJournal['entries']
                if sku['type'] == 'associate' and sku['attribute'] == 'Size'
            ])
            #skuId 和 colorId 映射
            skuId2ColorId = dict([
                (sku['skuId'], sku['savId']) for sku in skuJournal['entries']
                if sku['type'] == 'associate' and sku['attribute'] == 'Color'
            ])
            #sku 和 sizeName 映射
            sku2SizeName = self.get_sku2SizeName(product, skuId2SizeId,
                                                 sizeId2Name)
            #sku 和 colorName 映射
            sku2ColorName = self.get_sku2ColorName(product, skuId2ColorId,
                                                   colorId2Name)
            #lookId 和 colorId 映射
            lookId2ColorId = self.get_lookIe2ColorId(lookId2SkuArr,
                                                     skuId2ColorId)
            #lookId 和 colorName 映射
            lookId2ColorName = self.get_lookIe2ColorName(
                lookId2SkuArr, sku2ColorName)
            #lookId 和 size集合 映射
            lookId2Sizes = self.get_lookId2Sizes(lookId2SkuArr, sku2SizeName,
                                                 sku2Inventory, sku2Price,
                                                 sku2ListPrice)

            # print(json.dumps(sku2Price))
            # print(json.dumps(sku2ListPrice))
            # print(json.dumps(lookId2SkuArr))
            # print(json.dumps(sku2ColorName))
            # print(json.dumps(lookId2ColorName))
            # print(json.dumps(sku2SizeName))
            detail = dict()

            #只获取当前连接中的sku值
            try:
                lookId = None
                if '-' in url[url.rindex('/'):]:
                    lookId = url[url.rindex('/') + 1:].split('-')[0]
                    lookIds = [int(lookId)]
            except Exception, e:
                pass

            #钥匙
            detail['keys'] = lookId2SkuArr.keys()

            #只获取链接中lookId
            # detail['keys'] = lookIds or lookId2SkuArr.keys()

            #颜色
            detail['color'] = lookId2ColorName
            detail['colorId'] = lookId2ColorId

            #产品ID
            detail['productId'] = product['productId']

            #图片
            detail['img'] = dict([(lookId, imgArr[0])
                                  for lookId, imgArr in lookId2ImgArr.items()])
            detail['imgs'] = lookId2ImgArr

            #规格
            detail['sizes'] = lookId2Sizes

            #价格
            detail['price'] = lookId2Price
            detail['listPrice'] = lookId2ListPrice

            #品牌
            brand = pdata['brand']['name']
            detail['brand'] = brand

            #名称
            detail['name'] = brand + ' ' + pdata['product']['name']

            #货币符号
            currency = pdata['defaultLook']['pricing']['currencyCode']
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #退换货
            detail['returns'] = pdata['returnPolicy']['description']

            #描述
            dtxt = PyQuery(pdata['product']['description'])
            dtxt.remove('strong')
            detail['descr'] = dtxt.text()

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = resp.url

            log_info = json.dumps(
                dict(time=time.time(),
                     productId=detail['productId'],
                     name=detail['name'],
                     currency=detail['currency'],
                     price=detail['price'],
                     listPrice=detail['listPrice'],
                     url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)
Пример #30
0
    def detail(self, url):
        try:

            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            domain = tool.get_domain(url)

            #下架:
            if status_code == 404:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'SCERR', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #前期准备
            Jtxt = pqhtml('script').text()
            area = pqhtml('#container')
            pdata = self.get_pdata(Jtxt)
            domain = tool.get_domain(url)

            #下架
            # if not instock :
            # data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())

            # return tool.return_data(successful=False, data=data)

            # print area.outerHtml()
            # exit()

            detail = dict()

            #图片
            imgsTmp = [
                domain + a.attr('href')
                for a in area('form#addToCart ul.alt_imgs:first>li>a').items()
            ]
            detail['img'] = imgsTmp[0]
            detail['imgs'] = imgsTmp

            #名称
            detail['name'] = pdata['product']['name']

            #品牌
            detail['brand'] = area('form#addToCart a#sameBrandProduct').text()

            #价格
            detail['price'] = pdata['product']['unit_sale_price']
            detail['listPrice'] = pdata['product']['unit_price']

            #价格符号
            currency = pdata['product']['currency']
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #产品id
            productId = pdata['product']['id']
            detail['productId'] = productId

            #颜色
            detail['color'] = self.cfg.DEFAULT_ONE_COLOR
            detail['colorId'] = productId

            #规格
            detail['sizes'] = [
                dict(name=self.cfg.DEFAULT_ONE_SIZE,
                     inventory=pdata['product']['stock'],
                     sku=productId)
            ]

            #描述
            detail['descr'] = area('.prod_desc').text() + ' ' + area(
                'div#info_tabs>div.wrap>div#tab1_info').text()

            #详细
            detail['detail'] = area('#tab1_info').text()

            #品牌描述
            detail['brandDescr'] = area('#tab2_info').text()

            #保修
            detail['note'] = area('#tab5_info').text()

            #配送
            detail['delivery'] = area('#shippingData').text()

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = url

            log_info = json.dumps(
                dict(time=time.time(),
                     productId=detail['productId'],
                     name=detail['name'],
                     currency=detail['currency'],
                     price=detail['price'],
                     listPrice=detail['listPrice'],
                     url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise