Пример #1
0
    def detail(self, url):
        try:

            self.domain = tool.get_domain(url)

            if 'web1.sasa.com' in self.domain:
                return self.detail_by_hk(url)

            elif 'www.sasa.com' in self.domain:
                return self.detail_by_www(url)

        except Exception, e:
            raise
Пример #2
0
    def detail(self, url):
        try:
            # resp = self.session.get(url,timeout=self.cfg.REQUEST_TIME_OUT)
            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'SCERR', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #前期准备
            area = pqhtml('#productDetailsWrapper')
            domain = tool.get_domain(url)
            Jtxt = pqhtml('script').text()
            # pdata = self.get_pdata(area)

            # print area.outerHtml()
            # exit()

            #下架
            if 'SOLD OUT' in area('#productPrice').text() or not area:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            detail = dict()

            #品牌
            brand = re.search(r'FL.setup.brand = "(.*?)"', Jtxt,
                              re.DOTALL).groups()[0]
            detail['brand'] = brand

            #名称
            detail['name'] = area('h1#title').text()

            #货币,官网固定
            currency = 'USD'
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #价格
            price, listPrice = self.get_all_price(area)
            detail['price'] = price
            detail['listPrice'] = listPrice

            #颜色
            colors = self.get_colors(area)
            detail['color'] = colors
            detail['colorId'] = {cid: cid for cid in colors.keys()}

            #图片集
            imgs = self.get_imgs(area, pqhtml)
            detail['img'] = imgs[0] if isinstance(imgs, list) else {
                cid: imgArr[0]
                for cid, imgArr in imgs.items()
            }
            detail['imgs'] = imgs

            #产品ID
            productId = area('h1.title').attr('data-productitemid')
            detail['productId'] = productId

            #规格
            detail['sizes'] = self.get_sizes(area)

            #键
            detail['keys'] = colors.keys()

            #描述
            detail['descr'] = area('div#productDescription').text()

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = resp.url

            log_info = json.dumps(
                dict(time=time.time(),
                     productId=detail['productId'],
                     name=detail['name'],
                     currency=detail['currency'],
                     price=detail['price'],
                     listPrice=detail['listPrice'],
                     url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #3
0
    def detail(self,url):
        try:
            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404 :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
                
                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200 :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get('SCERR','ERROR'), backUrl=resp.url, html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)
            
            #前期准备
            area = pqhtml('#goodsInfo')
            domain = tool.get_domain(url)
            # pdata = self.get_pdata(area)
            
            # print area.outerHtml().encode('utf-8')
            # exit()

            #下架
            if not area :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)
                data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
                
                return tool.return_data(successful=False, data=data)

            detail = dict()


            #产品ID
            productId = pqhtml('#goodsForm input#bskGodGodNo').attr('value')
            detail['productId'] = productId
            detail['productSku'] = productId
            detail['productCode'] = area('.prd-code').text()
            
            #品牌
            brand = pqhtml('#goodsForm input#brndNm').attr('value')
            detail['brand'] = brand

            #名称
            detail['name'] = u'{0} {1}'.format(brand,pqhtml('#goodsForm input#godNm').attr('value'))

            #货币,价格
            currency,price,listPrice = self.get_currency_prices(pqhtml,area)
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            detail['price'] = price
            detail['listPrice'] = listPrice

            #描述
            detail['descr'] = pqhtml('meta[name="description"]').attr('content')

            #详细
            detail['detail'] = pqhtml('meta[name="description"]').attr('content') + area('.desc-area').text()

            #颜色
            color = self.get_color(area)
            detail['color'] = self.cfg.DEFAULT_ONE_COLOR
            detail['colorId'] = self.cfg.DEFAULT_COLOR_SKU

            #图片集
            imgs = [ img.attr('src') for img in pqhtml('#prdImgWrap .prdImg ul>li>img').items()]
            detail['img'] = pqhtml('meta[property="og:image"][name="og_image"]').attr('content')
            detail['imgs'] = imgs

            #规格
            detail['sizes'] = self.get_sizes(area)

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = resp.url
            
            #返回的IP和端口
            if resp.raw._original_response.peer :
                detail['ip_port']=':'.join(map( lambda x:str(x),resp.raw._original_response.peer))

            log_info = json.dumps(dict(time=time.time(), 
                                       productId=detail['productId'], 
                                       name=detail['name'], 
                                       currency=detail['currency'], 
                                       price=detail['price'], 
                                       listPrice=detail['listPrice'], 
                                       url=url))

            self.logger.info(log_info)


            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #4
0
    def detail(self,url):
        try:
            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404 :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
                
                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200 :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get('SCERR','ERROR'), backUrl=resp.url, html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)
            
            #前期准备
            area = pqhtml('#theater')
            domain = tool.get_domain(url)
            pdata = self.get_pdata(pqhtml('script:gt(20)'))
            
            # exit()

            #下架
            # if True :

                # log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                # self.logger.info(log_info)
                # data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
                
                # return tool.return_data(successful=False, data=data)

            detail = dict()

            #品牌
            brand = area('.brand').text()
            detail['brand'] = brand

            #名称
            detail['name'] = area('h1:first').text()


            currencySymbol,price,listPrice = self.get_price_info(pdata)

            if currencySymbol != '$' :
                raise ValueError('currencySymbol is not USD')

            #货币
            currency = 'USD'
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #价格
            detail['price'] = price
            detail['listPrice'] = listPrice

            #颜色
            color = self.get_color(pdata)
            detail['color'] = color
            detail['colorId'] = {cid:cid for cid in color.keys() }

            #图片集
            img,imgs = self.get_imgs(pdata)
            detail['img'] = img
            detail['imgs'] = imgs

            #产品ID
            productId = pqhtml('input[name="productId"]').attr('value')
            detail['productId'] = productId

            #规格
            sizes = self.get_sizes(pdata)
            detail['sizes'] = sizes

            #描述
            detail['descr'] = area('.description').text()

            detail['keys'] = set(img.keys())&set(sizes.keys())

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = resp.url
            
            #返回的IP和端口
            if resp.raw._original_response.peer :
                detail['ip_port']=':'.join(map( lambda x:str(x),resp.raw._original_response.peer))

            log_info = json.dumps(dict(time=time.time(), 
                                       productId=detail['productId'], 
                                       name=detail['name'], 
                                       currency=detail['currency'], 
                                       price=detail['price'], 
                                       listPrice=detail['listPrice'], 
                                       url=url))

            self.logger.info(log_info)


            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #5
0
    def detail(self, url):
        try:
            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'SCERR', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #前期准备
            area = pqhtml('div.primary-content')
            domain = tool.get_domain(url)

            # print area.outerHtml().encode('utf-8')
            # exit()

            #下架
            # if True :

            # log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

            # self.logger.info(log_info)
            # data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())

            # return tool.return_data(successful=False, data=data)

            detail = dict()

            #产品ID
            # productId = area('input.productId').attr('value')
            productId = pqhtml('span[itemprop="productID"]').attr('content')
            detail['productId'] = productId
            detail['productSku'] = productId
            detail['productCode'] = productId

            #品牌
            brand = pqhtml('span[itemprop="brand"]').attr('content')
            detail['brand'] = brand

            #名称
            detail['name'] = pqhtml('span[itemprop="name"]').attr('content')

            #货币
            currency = pqhtml('span[itemprop="priceCurrency"]').attr('content')
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #价格
            price, listPrice = self.get_all_price(area)
            price = pqhtml('span[itemprop="price"]').attr('content')
            detail['price'] = price
            detail['listPrice'] = listPrice

            #一级分类
            detail['category'] = area('a[data-bigpopup="sizeChart"]').attr(
                'data-category')

            #二级分类
            detail['subcategory'] = area('a[data-bigpopup="sizeChart"]').attr(
                'data-sub-category')

            #描述
            detail['descr'] = pqhtml('span[itemprop="description"]').attr(
                'content')

            #详细
            detail['detail'] = area('#collapseOne').text()

            #退换货
            detail['returns'] = area('#collapseFive').text()

            #颜色
            # color = self.get_color(area)
            detail['color'] = pqhtml('span[itemprop="color"]').attr('content')
            detail['colorId'] = self.cfg.DEFAULT_COLOR_SKU

            #图片集
            imgs = [
                img.attr('src') for img in area(
                    '.product-image-carousel img.primary-image').items()
            ]
            detail['img'] = pqhtml('span[itemprop="image"]').attr('content')
            detail['imgs'] = imgs

            #规格
            detail['sizes'] = self.get_sizes(area)

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = resp.url

            #返回的IP和端口
            if resp.raw._original_response.peer:
                detail['ip_port'] = ':'.join(
                    map(lambda x: str(x), resp.raw._original_response.peer))

            log_info = json.dumps(
                dict(time=time.time(),
                     productId=detail['productId'],
                     name=detail['name'],
                     currency=detail['currency'],
                     price=detail['price'],
                     listPrice=detail['listPrice'],
                     url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #6
0
    def detail(self, url):
        try:
            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'SCERR', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #前期准备
            area = pqhtml('#container')
            domain = tool.get_domain(url)
            pdata = self.get_pdata(pqhtml, domain)

            # print area.outerHtml()
            # print json.dumps(pdata)
            # exit()

            #下架
            if not pdata['hasOrderableVariants']:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            detail = dict()

            #品牌
            brand = area('.product-meta').attr('data-brand')
            detail['brand'] = brand

            #名称
            detail['name'] = area('.product-meta').attr('data-productname')

            #货币
            currency = re.search(r's\["currencyCode"\]="(\w{3})";',
                                 pqhtml('script').text()).groups()[0]
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #获取信息.
            price, sizes = self.get_info(pdata)

            #价格
            detail['price'] = price

            ptxt = area('.pricenotebucket').text()
            listPrice = re.search(r'\d[\d\.]',
                                  ptxt).groups()[0] if ptxt else price

            detail['listPrice'] = listPrice

            #颜色
            status, color, imgs = self.get_color(pdata)
            detail['color'] = color
            detail['colorId'] = dict([(key, key) for key in color.keys()])

            #钥匙
            detail['keys'] = color.keys()

            #图片集
            detail['img'] = imgs[0] if isinstance(imgs, list) else dict(
                [(cId, imgArr[0]) for cId, imgArr in imgs.items()])
            detail['imgs'] = imgs

            #产品ID
            productId = area('.product-meta').attr('data-pid')
            detail['productId'] = productId

            #规格
            detail['sizes'] = sizes

            #描述
            detail['descr'] = area(
                'section.product-details .longdescription').text()

            #详细
            detail['detail'] = area('section.product-details').text()

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = status

            #返回链接
            detail['backUrl'] = resp.url

            log_info = json.dumps(
                dict(time=time.time(),
                     productId=detail['productId'],
                     name=detail['name'],
                     currency=detail['currency'],
                     price=detail['price'],
                     listPrice=detail['listPrice'],
                     url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #7
0
    def detail(self, url):
        try:
            self.domain = tool.get_domain(url)

            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())
                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'SCERR', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #前期准备
            area = pqhtml('#content>#productContainer')
            pdata = self.get_pdata(pqhtml)

            # print area.outerHtml()

            # print json.dumps(pdata)
            # exit()

            #下架
            if not area or area('.productButtons #disabledAddtobasket'):

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            detail = dict()

            #品牌
            brand = 'COS'
            detail['brand'] = brand

            #名称
            detail['name'] = area('.productInfo h1:first').text()

            #货币
            currency = pqhtml('meta[property="og:price:currency"]').attr(
                'content')
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #价格
            price, listPrice = self.get_all_price(pqhtml, area)
            detail['price'] = price
            detail['listPrice'] = listPrice

            #颜色
            color = self.get_color(area)
            detail['color'] = color
            detail['colorId'] = dict([(key, key) for key in color.keys()])

            #图片集
            imgs = self.get_imgs(area)
            detail['img'] = imgs[0] if isinstance(imgs, list) else dict(
                [(cid, Arr[0]) for cid, Arr in imgs.items()])
            detail['imgs'] = imgs

            #钥匙
            detail['keys'] = color.keys()

            #产品ID
            productId = area('input[data-product-identifier!=""]').attr(
                'data-product-identifier')
            detail['productId'] = productId

            #规格
            detail['sizes'] = self.get_sizes(area)

            #描述
            detail['descr'] = area('.productInfo>.infowrap>dl>dd:first').text()

            #退换货
            detail['returns'] = area(
                '.productInfo>.infowrap>dl>dd:first').text()

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = url

            log_info = json.dumps(
                dict(time=time.time(),
                     productId=detail['productId'],
                     name=detail['name'],
                     currency=detail['currency'],
                     price=detail['price'],
                     listPrice=detail['listPrice'],
                     url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #8
0
    def detail(self, url):
        try:
            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'SCERR', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #前期准备
            area = pqhtml('#pdp-page')
            domain = tool.get_domain(url)

            # print area.outerHtml()
            # exit()

            #下架
            if not area:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)
                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            detail = dict()

            #产品ID
            productId = pqhtml('img[data-stylenumber!=""]').attr(
                'data-stylenumber').split('_')[0]
            detail['productId'] = productId

            #品牌
            brand = 'Lululemon'
            detail['brand'] = brand

            #名称
            detail['name'] = area('h1.OneLinkNoTx').text()

            #货币
            currency = pqhtml('input#currencyCode').attr('value').strip()
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #价格
            price, listPrice = self.get_all_price(area)
            detail['price'] = price
            detail['listPrice'] = listPrice

            #描述
            detail['descr'] = self.get_descr(pqhtml, area)

            #详细
            detail['detail'] = area('#fabric').text()

            #退换货
            detail['returns'] = ''

            colorDriver, colorCount = self.get_pdata(pqhtml)

            #颜色
            img, imgs, color = self.get_color(area, colorCount)
            detail['color'] = color
            detail['colorId'] = {key: key for key in color}

            #图片集
            detail['img'] = img
            detail['imgs'] = imgs

            #规格
            sizes, price = self.get_sizes(colorDriver)
            detail['sizes'] = sizes
            detail['price'] = price

            if isinstance(color, dict):
                detail['keys'] = [key for key in color]

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = resp.url

            #返回的IP和端口
            if resp.raw._original_response.peer:
                detail['ip_port'] = ':'.join(
                    map(lambda x: str(x), resp.raw._original_response.peer))

            log_info = json.dumps(
                dict(time=time.time(),
                     productId=detail['productId'],
                     name=detail['name'],
                     currency=detail['currency'],
                     price=detail['price'],
                     listPrice=detail['listPrice'],
                     url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #9
0
    def detail(self,url):
        try:
            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404 :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200 :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get('SCERR','ERROR'), backUrl=resp.url, html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)
            
            #前期准备
            pqhtml.remove('style')
            area = pqhtml('#overall_content')
            domain = tool.get_domain(url)
            pdata = self.get_pdata(pqhtml)

            # print pqhtml.outerHtml()
            # print area.outerHtml()

            # print json.dumps(pdata)
            # exit()

            #下架
            # if area('div[itemprop="availability"]').text().strip() != 'Available' :

            #     log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

            #     self.logger.info(log_info)

            #     data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
            #     return tool.return_data(successful=False, data=data)

            detail = dict()

            #品牌
            brand = 'TouchOfModern'
            detail['brand'] = brand

            #名称
            detail['name'] = brand + ' ' + pdata['name']

            #货币
            currency = pqhtml('meta[property="og:price:currency"]').attr('content')
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #价格
            # listPrice = self.get_all_price(area)
            detail['price'] = pdata['price']

            #颜色
            detail['color'] = self.cfg.DEFAULT_ONE_COLOR
            detail['colorId'] = pdata['id']

            #图片集
            detail['img'] = area('.big_image_wrapper a').attr('href')
            detail['imgs'] = [ e.attr('href') for e in area('div[class="product-image-container"] a').items() ]

            #产品ID
            productId = pdata['id']
            detail['productId'] = productId

            #规格
            listPrice,sizes = self.get_sizes(area)
            detail['sizes'] = sizes
            detail['listPrice'] = listPrice or pdata['price']

            #视频
            if len(area('.product-video-container')) > 0 :
                detail['video'] = self.get_video(area)

            #描述
            detail['descr'] = area('.product-details-section').text()

            #详细
            detail['detail'] = area('.product-details-section').text()

            #退换货
            detail['returns'] = area('.shipping-details-listt').text()

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = url

            log_info = json.dumps(dict(time=time.time(), productId=detail['productId'], name=detail[
                                  'name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #10
0
    def detail(self,url):
        try:
            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404 :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
                
                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200 :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get('SCERR','ERROR'), backUrl=resp.url, html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)
            
            #前期准备
            area = pqhtml('.product-detail-information')
            self.domain = tool.get_domain(url)
            pdata = self.get_pdata(pqhtml)

            productId = pdata['id']
            subData = self.get_subData(productId)
            
            # print json.dumps(pdata)
            # print json.dumps(subData)

            # exit()

            #下架
            if 'Product may be unavailable' in subData.get('errorMessage','') :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)
                data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
                
                return tool.return_data(successful=False, data=data)

            detail = dict()

            #产品ID
            detail['productId'] = productId
            detail['productSku'] = productId
            detail['productCode'] = productId
            
            #品牌
            brand = subData['productThumbnail']['brand']
            detail['brand'] = brand

            #名称
            detail['name'] = pdata.get('name','') or PyQuery(pdata['shortDescription']).text()

            #货币
            currency = pqhtml('meta[itemprop="priceCurrency"]').attr('content')
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            # print json.dumps(pdata)

            #价格
            # price,listPrice = pdata['salePrice'],pdata.get('regularPrice',pdata['salePrice'])
            price,listPrice = self.get_all_price(pdata)
            detail['price'] = price or listPrice
            detail['listPrice'] = listPrice

            #描述
            detail['descr'] = pqhtml('#memberProductDetails').text()
            detail['descr'] = pqhtml('.product-details-content').text()

            #图片集
            imgs = self.get_imgs(subData)

            detail['img'] = imgs[0] if isinstance(imgs,list) else {cid:Arr[0] for cid,Arr in imgs.items()}
            detail['imgs'] = imgs

            #规格
            sizes = self.get_sizes_by_subdata(subData['availabilityMap'])
            detail['sizes'] = sizes

            # detail['keys'] = sizes.keys()     #size里面有的颜色,price里面没有,2016-11-27
            keys = price.keys() if isinstance(price,dict) else sizes.keys()
            keys = map(lambda x: x , keys)
            detail['keys'] = set(keys)

            #部分颜色没有图片。随机取一个图片,2016-11-27
            if isinstance(price,dict) :
                for colorName in price.keys():
                    if colorName not in detail['imgs'] :
                        detail['imgs'][colorName] = imgs.values()[0]        #[pdata['mainImageURL']]
                        detail['img'][colorName] = imgs.values()[0][0]      #pdata['mainImageURL']

            #颜色
            color = {color:color for color in sizes.keys()}
            detail['color'] = color
            detail['colorId'] = color

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = resp.url
            
            #返回的IP和端口
            if resp.raw._original_response.peer :
                detail['ip_port']=':'.join(map( lambda x:str(x),resp.raw._original_response.peer))

            log_info = json.dumps(dict(time=time.time(), 
                                       productId=detail['productId'], 
                                       name=detail['name'], 
                                       currency=detail['currency'], 
                                       price=detail['price'], 
                                       listPrice=detail['listPrice'], 
                                       url=url))

            self.logger.info(log_info)


            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #11
0
    def detail(self, url):
        resp = self.session.get(url, verify=False)

        status_code = resp.status_code
        pqhtml = PyQuery(resp.text or 'nothing')

        # 下架
        is_ok,data = self.is_ok_status_code(status_code, pqhtml, url, resp)

        if not is_ok :
            return data

        # 前期准备
        area = pqhtml('#contentArea')
        domain = tool.get_domain(url)
        # pdata = self.get_pdata(area)
        productId = area('form input[name="prodCode"]').attr('value')

        pdata = self.get_pdata(productId)

        # print pdata

        # print area.outerHtml().encode('utf-8')

        # exit()

        # 下架
        if not pdata and not area('#divSelectOpt input') :

            log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

            self.logger.info(log_info)
            data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())

            return tool.return_data(successful=False, data=data)

        detail = dict()

        # 产品ID
        detail['productId'] = productId
        detail['productSku'] = productId
        detail['productCode'] = productId

        # 品牌
        brand = area('.detailInfo .infoTable .titleWrap a:first').text()
        detail['brand'] = brand

        # 名称
        detail['name'] = area('.detailInfo .infoTable .titleWrap').text()

        # 价格
        currency, price, listPrice = self.get_all_price(area)
        detail['price'] = price
        detail['listPrice'] = listPrice

        # 货币,取固定的美元价格
        detail['currency'] = currency
        detail['currencySymbol'] = tool.get_unit(currency)

        # 描述
        detail['descr'] = area('.infoTable .optTable_1').text() or u'没有获取到描述'

        # 颜色
        # color = self.get_color(area)
        detail['color'] = self.cfg.DEFAULT_ONE_COLOR
        detail['colorId'] = self.cfg.DEFAULT_COLOR_SKU

        # 图片集
        imgs = [ 'http://www.sheisback.com'+img.attr('src') for img in area('.detailArea img').items()]
        detail['img'] = 'http://www.sheisback.com' + area('.detailImg img:first').attr('data-zoom-image')
        detail['imgs'] = imgs

        # 规格
        detail['sizes'] = self.get_sizes(pdata)

        # HTTP状态码
        detail['status_code'] = status_code

        # 状态
        detail['status'] = self.cfg.STATUS_SALE

        # 返回链接
        detail['backUrl'] = resp.url

        # 返回的IP和端口
        if resp.raw._original_response.peer:
            detail['ip_port'] = ':'.join(
                map(lambda x: str(x), resp.raw._original_response.peer))

        log_info = json.dumps(dict(time=time.time(),
                                   productId=detail['productId'],
                                   name=detail['name'],
                                   currency=detail['currency'],
                                   price=detail['price'],
                                   listPrice=detail['listPrice'],
                                   url=url))

        self.logger.info(log_info)

        return tool.return_data(successful=True, data=detail)
Пример #12
0
    def detail(self, url):
        try:

            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            domain = tool.get_domain(url)

            #下架:
            if status_code == 404:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())
                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'SCERR', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #前期准备
            Jtxt = pqhtml('script').text()
            area = pqhtml('.product-area')

            dataLayer = json.loads(
                re.search(r'dataLayer = \[(\{.*?\})\];', Jtxt,
                          re.DOTALL).groups()[0].replace('\'', '"'))

            #默认官网只有一个颜色,一个产品,多颜色多size,多colorID已处理好,但是多颜色多图片没有处理. 在 get_imgs 方法.
            assert len(dataLayer['productDetails']
                       ) == 1, 'coggles too many products , fix this bug'

            productId, pdata = self.get_pdata(domain, dataLayer)

            instock = area('meta[itemprop="availability"]').attr(
                'content') == 'InStock'

            #下架
            if not instock or 'Sold Out' in area('.product-simple').text():

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            # print area.outerHtml()
            # exit()

            detail = dict()

            #图片
            imgsTmp = self.get_imgs(pdata, pqhtml)
            detail['img'] = imgsTmp[0]
            detail['imgs'] = imgsTmp

            #名称
            detail['name'] = area('.product-title-wrap').text()

            #品牌
            detail['brand'] = re.search(r'productBrand: "(.*?)",', Jtxt,
                                        re.DOTALL).groups()[0]

            #价格
            price, listPrice = self.get_all_price(area, pdata)
            detail['price'] = price
            detail['listPrice'] = listPrice

            #价格符号
            currency = dataLayer['pageAttributes'][0]['currency']
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #产品id
            prodId = pqhtml('input[name="prodId"]').attr('value')
            detail['productId'] = prodId

            #颜色
            color = self.get_color(pdata)
            detail['color'] = color
            detail['colorId'] = dict([(key, key) for key in color.keys()])

            #钥匙
            detail['keys'] = color.keys()

            #规格
            detail['sizes'] = self.get_sizes(productId, pdata)

            #描述
            detail['descr'] = area('div[itemprop="product-description"]').text(
            ).replace('\'',
                      '') + area('div[itemprop="description"]').text().replace(
                          '\'', '')

            #注意:
            if len(area('.promotionalmessage')) > 1:
                detail['note'] = area('.promotionalmessage').text()

            #详细
            detail['detail'] = area('.js-prodInfo-details').text()

            #退货和配送信息
            detail['returns'] = area(
                'div.product-delivery-returns').text().replace('\'', '')

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = url

            log_info = json.dumps(
                dict(time=time.time(),
                     productId=detail['productId'],
                     name=detail['name'],
                     currency=detail['currency'],
                     price=detail['price'],
                     listPrice=detail['listPrice'],
                     url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #13
0
    def detail(self, url):
        try:
            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'SCERR', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #下架
            if len(pqhtml('#itemOptions #addToBasketDisabled')) > 0:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #前期准备
            area = pqhtml('#details') or pqhtml('#productPage')
            domain = tool.get_domain(url)
            pdata = self.get_data(pqhtml)

            # print area.outerHtml().encode('utf-8')
            # print pdata
            # exit()

            detail = dict()

            #品牌
            brand = re.search(r'brand: "(.*?)",', pdata).groups()[0]
            detail['brand'] = brand

            #名称
            detail['name'] = area('h1[itemprop="name"]').text()

            #货币
            currency = self.get_currency(pdata)
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #价格
            price, listPrice = self.get_all_price(pdata, area)
            detail['price'] = price
            detail['listPrice'] = listPrice

            # print area.outerHtml()

            #图片集
            img_area = area('#itemGallery') or area(
                '#galleryBasic')  #2016-09-16 13:51:08 更新
            imgs = [img.attr('src') for img in img_area('img').items()]
            imgs = imgs or [
                img.attr('data-zoom-image')
                for img in area('#product-view .main-image img').items()
            ]  # 2017-03-3更新
            detail['img'] = imgs[0]
            detail['imgs'] = imgs

            #产品ID
            productId = area('.wishlistAdd').attr('data-sku') or area(
                '#productPage').attr('data-sku')
            detail['productId'] = productId

            #颜色
            # color = self.get_color(area)
            detail['color'] = self.cfg.DEFAULT_ONE_COLOR
            detail['colorId'] = productId

            #规格
            detail['sizes'] = self.get_sizes(area)

            #描述
            detail['descr'] = area('#itemInfo ul>li:first').text()

            #退换货
            detail['returns'] = area('#itemInfo ul>li:last').text()

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = resp.url

            log_info = json.dumps(
                dict(time=time.time(),
                     productId=detail['productId'],
                     name=detail['name'],
                     currency=detail['currency'],
                     price=detail['price'],
                     listPrice=detail['listPrice'],
                     url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception:
            raise
Пример #14
0
    def detail(self, url):
        try:
            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'SCERR', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #前期准备
            area = pqhtml('.detalheProdutos')
            domain = tool.get_domain(url)
            pdata = json.loads(
                pqhtml('script[type=\'application/ld+json\']').text())

            # print area.outerHtml()
            # exit()

            #下架
            if 'SOLD OUT' in area('.topOff').text():

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            detail = dict()

            #品牌
            brand = pdata['brand']['name']
            detail['brand'] = brand

            #名称
            detail['name'] = pdata['name']

            #货币
            currency = pdata['offers']['priceCurrency']
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #价格
            listPrice = self.get_listPrice(area, currency)
            detail['price'] = pdata['offers']['price']
            detail['listPrice'] = listPrice

            #颜色多于2个...
            if len(area('#listaCores a')) > 1:
                raise ValueError, 'color number is great than 1 , fix this bug : %s' % url

            #颜色
            detail['color'] = area('#listaCores a:first').text()
            detail['colorId'] = area('#listaCores a:first').attr('data-id')

            #图片集
            imgs = [
                domain + a.attr('href')[1:]
                for a in pqhtml('.lightgalleryG .item a').items()
            ]
            detail['img'] = pdata['image']
            detail['imgs'] = imgs

            #产品ID
            productId = area('a#btAddCarrinho').attr('data-id')
            detail['productId'] = productId

            # for ele in area('div#listaTamanhos a').items() :
            # 	print ele.text()

            #规格
            detail['sizes'] = [ dict(name=ele.text(),inventory=self.cfg.DEFAULT_STOCK_NUMBER,id=ele.attr('data-id'),sku=ele.attr('data-ref'),price=ele.attr('data-preco').split()[0]) for ele in area('div#listaTamanhos a').items() ] \
                 or [ dict(name=self.cfg.DEFAULT_ONE_SIZE,inventory=self.cfg.DEFAULT_STOCK_NUMBER,id=productId) ]
            #描述
            detail['descr'] = pdata['description']

            #详细
            detail['detail'] = area('.descMarca').text()

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = resp.url

            log_info = json.dumps(
                dict(time=time.time(),
                     productId=detail['productId'],
                     name=detail['name'],
                     currency=detail['currency'],
                     price=detail['price'],
                     listPrice=detail['listPrice'],
                     url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #15
0
    def detail(self, url):
        try:
            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'SCERR', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #前期准备
            area = pqhtml('#productview #main')
            domain = tool.get_domain(url)

            # exit()

            #下架
            # if True :

            # log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

            # self.logger.info(log_info)
            # data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())

            # return tool.return_data(successful=False, data=data)

            detail = dict()

            #品牌
            brand = self.get_brand(pqhtml)
            detail['brand'] = brand

            #名称
            detail['name'] = area('#name').text()

            #货币
            currency = pqhtml('div[id="doc"]').attr('currency')
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #获取信息
            price, listPrice, sizes = self.get_info(area)

            #价格
            detail['price'] = price
            detail['listPrice'] = listPrice

            #产品ID
            productId = area('input#productid').attr('value')
            detail['productId'] = productId

            #颜色
            # color = self.get_color(area)
            detail['color'] = self.cfg.DEFAULT_ONE_COLOR
            detail['colorId'] = productId

            #图片集
            imgs = [
                img.attr('data-hires')
                for img in area('#thumbs-anim img').items()
            ]
            detail['img'] = imgs[0]
            detail['imgs'] = imgs

            #规格
            detail['sizes'] = sizes

            #描述
            detail['descr'] = area('.product-details').text()

            #详细
            detail['detail'] = area('.product-details').text()

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = resp.url

            log_info = json.dumps(
                dict(time=time.time(),
                     productId=detail['productId'],
                     name=detail['name'],
                     currency=detail['currency'],
                     price=detail['price'],
                     listPrice=detail['listPrice'],
                     url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #16
0
    def detail(self,url):

        pqhtml = ''
        try:
            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404 :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
                
                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200 :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get('SCERR','ERROR'), backUrl=resp.url, html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)
            
            #前期准备
            area = pqhtml('form[name="productPage"]')
            domain = tool.get_domain(url)
            
            # print area.html().encode('utf-8')
            # exit()

            need_refresh_node = pqhtml('meta[http-equiv="refresh"]')
            if len(need_refresh_node) :
                time_limit = need_refresh_node.attr('content').strip().split(';')[0]

                sleep_seconds = int(time_limit)/2
                time.sleep(sleep_seconds)

                #<RequestsCookieJar[]>
                self.session.cookies.set('INSTART_SESSION_ID',str(int((time.time()-sleep_seconds)*1000)))

                resp = self.session.get(url, verify=False)

                pqhtml = PyQuery(resp.text)

                area = pqhtml('form[name="productPage"]')

            #下架
            if not area or len(area('.cannotorder')):
            # if not area :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)
                data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
                
                return tool.return_data(successful=False, data=data)

            productId = area('input[name$="productId"][value!=""]').attr('value')
            pdata = self.get_pdata(area,productId)

            detail = dict()


            #产品ID
            detail['productId'] = productId
            
            #品牌
            brand = area('input.cmDesignerName').attr('value')
            detail['brand'] = brand

            #名称
            detail['name'] =area('h1.product-name:first').text()

            #货币
            currency = pqhtml('meta[itemprop="priceCurrency"]').attr('content')
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #价格
            price,listPrice = self.get_all_price(area)

            detail['price'] = price
            detail['listPrice'] = listPrice

            #描述
            detail['descr'] = area('div[itemprop="description"]').text()

            #详细
            detail['detail'] = area('.product-details-info').text()

            #颜色
            # color = self.get_color(area)
            # detail['color'] = self.cfg.DEFAULT_ONE_COLOR
            # detail['colorId'] = self.cfg.DEFAULT_COLOR_SKU

            #图片集
            img,imgs = self.get_imgs(area)
            detail['img'] = img
            detail['imgs'] = imgs

            #规格
            sizes = self.get_sizes(pdata)
            detail['sizes'] = sizes

            if isinstance(sizes,dict):
                detail['keys'] = sizes.keys()
                detail['color'] = {key:key for key in sizes}
                detail['colorId'] = {key:key for key in sizes}

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = resp.url
            
            #返回的IP和端口
            if resp.raw._original_response.peer :
                detail['ip_port']=':'.join(map( lambda x:str(x),resp.raw._original_response.peer))

            log_info = json.dumps(dict(time=time.time(), 
                                       productId=detail['productId'], 
                                       name=detail['name'], 
                                       currency=detail['currency'], 
                                       price=detail['price'], 
                                       listPrice=detail['listPrice'], 
                                       url=url))

            self.logger.info(log_info)


            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            self.logger.exception('html:{0}'.format(pqhtml))
            raise
Пример #17
0
    def detail(self,url):
        try:
            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404 :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200 :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get('SCERR','ERROR'), backUrl=resp.url, html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)
            
            #前期准备
            area = pqhtml('.page-content')
            domain = tool.get_domain(url)
            # pdata = self.get_pdata(pqhtml)

            # print area.outerHtml()

            # print json.dumps(pdata)
            # exit()

            #下架
            if area('div[itemprop="availability"]').text().strip() != 'Available' :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
                return tool.return_data(successful=False, data=data)

            detail = dict()

            #品牌
            brand = 'Kit and Ace'
            detail['brand'] = brand

            #名称
            detail['name'] = area('h1[itemprop="name"]').text()

            #货币
            currency = pqhtml('meta[itemprop="priceCurrency"]').attr('content')
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #价格
            price,listPrice = self.get_all_price(area)
            detail['price'] = price
            detail['listPrice'] = listPrice

            #颜色
            color = self.get_color(area)
            detail['color'] = color
            detail['colorId'] = dict([ (key,key) for key in color.keys() ])

            #图片集
            imgs = self.get_imgs(area,domain)
            detail['img'] = imgs[0] if isinstance(imgs,list) else dict([ (cid,Arr[0]) for cid,Arr in imgs.items() ])
            detail['imgs'] = imgs

            #钥匙
            detail['keys'] = color.keys()

            #产品ID
            productId = area('.js-pdp-product-code').attr('data-product-id')
            detail['productId'] = productId

            #规格
            detail['sizes'] = self.get_sizes(area)

            #描述
            detail['descr'] = area('.pdp-desc__description').text()

            #构造物
            detail['fabric'] = area('.pdp-info-components').text()

            #详细
            detail['detail'] = area('.productDetailsPageSection1').text()

            #退换货
            detail['returns'] = area('.productInfo>.infowrap>dl>dd:first').text()

            #模特信息
            detail['model'] = self.get_model(area,color.keys())

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = url

            log_info = json.dumps(dict(time=time.time(), productId=detail['productId'], name=detail[
                                  'name'], currency=detail['currency'], price=detail['price'], listPrice=detail['listPrice'], url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #18
0
    def detail(self, url):
        try:

            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')

            domain = tool.get_domain(url)

            productId = re.search(r'.*\/(\d+)\/.*', url, re.DOTALL).groups()[0]

            link = domain + ('/ajaxprodDetail.aspx?ProdId=%s' % productId)

            resp = self.session.get(link, verify=False)

            #下架
            if status_code == 404:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            if status_code != 200:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'SCERR', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #前期准备
            pdata = json.loads(resp.text)

            # print json.dumps(pdata)

            #下架
            if pdata['Prods'] == []:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())
                return tool.return_data(successful=False, data=data)

            detail = dict()

            #品牌
            brand = pdata['Brand'].get(
                'BrandLangName', None) or pdata['Brand']['DisplayBrandName']
            detail['brand'] = brand

            #名称
            currency = re.search(
                r'\(\'(\w{3})\'\)',
                pqhtml('a[onclick^="changeCurrency"]').attr('onclick'),
                re.DOTALL).groups()[0]
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #其他信息:

            detail['keys'] = []
            detail['name'] = dict()
            detail['sizes'] = dict()
            detail['price'] = dict()
            detail['img'] = dict()
            detail['imgs'] = dict()
            detail['descr'] = dict()
            detail['listPrice'] = dict()
            detail['color'] = dict()
            detail['colorId'] = dict()
            detail['productId'] = dict()

            for product in pdata['Prods']:

                productId = product['ProdID']

                detail['keys'].append(productId)

                detail['productId'][productId] = productId
                detail['color'][productId] = self.cfg.DEFAULT_ONE_COLOR
                detail['colorId'][productId] = productId

                detail['name'][
                    productId] = brand + ' ' + product['ProdLangName']

                detail['sizes'][productId] = [
                    dict(name=product['OptionValue'],
                         inventory=self.cfg.DEFAULT_STOCK_NUMBER,
                         sku=product['OptionValue'])
                ]

                detail['price'][productId] = re.search(
                    r'(\d[\d\.]*)',
                    PyQuery(product['ShopPrice']).text().replace(',', ''),
                    re.DOTALL).groups()[0]

                detail['listPrice'][productId] = re.search(
                    r'(\d[\d\.]*)',
                    PyQuery(product['WasPrice']
                            or product['ShopPrice']).text().replace(',', ''),
                    re.DOTALL).groups()[0]

                detail['img'][productId] = product['ProductImages'][0][
                    'img700Src'] or product['ProductImages'][0][
                        'img350Src'] or product['ProductImages'][0]['imgSrc']

                detail['imgs'][productId] = [
                    img['img700Src'] or img['img350Src'] or img['imgSrc']
                    for img in product['ProductImages']
                ]

                detail['descr'][productId] = ' '.join(
                    [descr.get('text') for descr in product['Description']])

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = url

            log_info = json.dumps(
                dict(time=time.time(),
                     productId=detail['productId'],
                     name=detail['name'],
                     currency=detail['currency'],
                     price=detail['price'],
                     listPrice=detail['listPrice'],
                     url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #19
0
    def detail(self, url):
        try:
            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())
                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'SCERR', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #前期准备
            area = pqhtml('.product-detail-container')
            domain = tool.get_domain(url)

            # print area.outerHtml()
            # exit()

            #下架
            if u'缺货' in area('#stock-status').text():

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)
                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            detail = dict()

            #品牌
            brand = area('#brand:first span').text() or area('#brand a').text()
            detail['brand'] = brand

            #名称
            detail['name'] = area('#name').text()

            #货币
            currency = area('#price-currency').attr('content')
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #价格
            price, listPrice = self.get_all_price(area)
            detail['price'] = price
            detail['listPrice'] = listPrice

            #颜色
            detail['color'] = self.cfg.DEFAULT_ONE_COLOR
            detail['colorId'] = self.cfg.DEFAULT_COLOR_SKU

            #图片集
            imgs = [
                a.attr('data-large-img') for a in area(
                    '.image-container  .thumbnail-container img').items()
            ] or [
                img.attr('src')
                for img in area('#iherb-product-zoom img').items()
            ]
            imgs = imgs or [
                area('#product-image .product-summary-image a').attr('href')
            ]
            detail['img'] = imgs[0]
            detail['imgs'] = imgs

            #产品ID
            productId = area('input[name="pid"]').attr('value')
            detail['productId'] = productId

            #规格
            stock_txt = area('#stock-status').text()

            inv = area('#ddlQty option:last').attr(
                'value'
            ) if 'In Stock' in stock_txt or u'有库存' in stock_txt else 0
            detail['sizes'] = [
                dict(name=self.cfg.DEFAULT_ONE_SIZE,
                     inventory=inv,
                     id=productId,
                     sku=productId)
            ]

            #描述
            detail['descr'] = area('#product-specs-list li').text()

            #详细
            detail['detail'] = pqhtml('div[itemprop="description"]').text()

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = url

            log_info = json.dumps(
                dict(time=time.time(),
                     productId=detail['productId'],
                     name=detail['name'],
                     currency=detail['currency'],
                     price=detail['price'],
                     listPrice=detail['listPrice'],
                     url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #20
0
    def detail(self, url):
        try:
            #打印当前IP
            # print self.session.get('http://geo.yieldify.com/geolocation.json').text
            # 0627 写一个插件版

            #绑定域名
            self.domain = tool.get_domain(url)

            resp = self.session.get(url, verify=False)

            #end 特有验证
            resp = self.end_verify(resp, url)

            status_code = resp.status_code

            pqhtml = PyQuery(resp.text or 'nothing')

            # not found 错误
            if status_code == 404 or '404 not found' in pqhtml(
                    'head title').text().lower():

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())
                return tool.return_data(successful=False, data=data)

            # 非200 错误
            if status_code != 200:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'SCERR', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())
                return tool.return_data(successful=False, data=data)

            Jtxt = pqhtml('script').text()

            area = pqhtml('div.product-essential')

            #下架
            if 'Sold out' in area('div.product-buy-box').text():

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())
                return tool.return_data(successful=False, data=data)

            #线下销售
            if len(area('div.notonline')) > 0:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'NTONLINE', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            # print area.outerHtml().encode('utf-8')

            #productConfig
            pcfg = self.get_pcfg(Jtxt)

            detail = dict()

            #价格符号
            currency = pqhtml('meta[property="product:price:currency"]').attr(
                'content')
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #价格
            price, listPrice = self.get_all_price(pcfg)
            detail['price'] = price
            detail['listPrice'] = listPrice

            # print area.outerHtml().encode('utf-8')

            #品牌
            detail['brand'] = pqhtml('meta[name="WT.z_pbrand"]').attr(
                'content') or area('.product-description span h1').text()

            #名称
            detail['name'] = area('h1[itemprop="name"]').text() or area(
                '.product-description h1').text()

            #图片
            imgs = self.get_imgs(area)
            detail['imgs'] = imgs
            detail['img'] = imgs[0]

            #产品ID
            productId = pcfg['productId']
            detail['productId'] = productId

            #颜色
            detail['color'] = area('div.product-description h3:first').text(
            ) or self.cfg.DEFAULT_ONE_COLOR
            detail['colorId'] = productId

            #退换货
            detail['returns'] = area('#prod-info-tab4').text()

            #规格
            detail['sizes'] = self.get_sizes(pcfg)

            #描述
            detail['descr'] = area('div.product-description-text').text(
            ) + area('#prod-info-tab2').text() + area(
                '#fit-description').text()

            #配送
            detail['delivery'] = area('#prod-info-tab2').text()

            #size说明.
            detail['sizeFit'] = area('#fit-description').text()

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = resp.url

            log_info = json.dumps(
                dict(time=time.time(),
                     productId=detail['productId'],
                     name=detail['name'],
                     currency=detail['currency'],
                     price=detail['price'],
                     listPrice=detail['listPrice'],
                     url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #21
0
    def detail(self, url):
        try:

            resp = self.session.get(url, verify=False)  #加载第一次.

            #验证resp.防爬虫.!!!
            resp = self.resp_verify(resp)

            if 'window.location.reload(true);' in resp.text:

                resp = self.session.get(url, verify=False)  #加载第二次.

            #会出现不返回内容的情况
            while not resp.text:
                return self.detail(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')

            #下架
            if status_code == 404:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'SCERR', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #下架
            if 'Out of stock' in pqhtml('.product-availability').text():

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #前期准备
            area = pqhtml('div#product-view')
            domain = tool.get_domain(url)
            pdata = self.get_pdata(area)

            detail = dict()

            #品牌
            brand = area('.panel-a h1:first').text().split('-')[0].strip()
            detail['brand'] = brand

            #名称
            detail['name'] = area('.panel-a h1:first').text()

            #货币
            currency = pqhtml('meta[itemprop="priceCurrency"]').attr('content')
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #产品ID
            productId = pdata['productId']
            detail['productId'] = productId

            #价格
            price, listPrice = pdata['basePrice'].replace(
                ',', ''), pdata['oldPrice'].replace(',', '')
            detail['price'] = price
            detail['listPrice'] = listPrice

            #颜色
            # color = self.get_color(area)
            detail['color'] = area('button#product-addtocart-button').attr(
                'data-variant') or self.cfg.DEFAULT_ONE_COLOR
            detail['colorId'] = productId

            #图片集
            imgs = [
                img.attr('data-src')
                for img in area('div#mobile-carousel-images a>img').items()
            ]
            detail['img'] = imgs[0]
            detail['imgs'] = imgs

            #规格
            detail['sizes'] = self.get_sizes(area, pdata)

            #描述
            detail['descr'] = area('div.tog-desc').text() + area.parent()(
                '.description-section:first').text()

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = resp.url

            log_info = json.dumps(
                dict(time=time.time(),
                     productId=detail['productId'],
                     name=detail['name'],
                     currency=detail['currency'],
                     price=detail['price'],
                     listPrice=detail['listPrice'],
                     url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #22
0
    def detail(self, url):
        try:
            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'SCERR', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #前期准备
            area = pqhtml('.product-area')
            domain = tool.get_domain(url)
            # pdata = self.get_pdata(area)

            # print area.outerHtml()
            # exit()

            #下架
            if 'In stock' not in area('p.availability').text():

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            detail = dict()

            #品牌
            brand = area('meta[itemprop="brand"]').attr('content')
            detail['brand'] = brand

            #名称
            detail['name'] = area('h1[itemprop="name"]').text()

            #货币
            currency = area('meta[itemprop="priceCurrency"]').attr('content')
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #产品ID
            productId = area('input[name="prodId"]').attr('value')
            detail['productId'] = productId
            self.productId = productId

            #获取信息
            color, price, listPrice, img, imgs, sizes = self.get_info(area)

            #钥匙
            if isinstance(color, dict):
                detail['keys'] = color.keys()

            #价格
            detail['price'] = price
            detail['listPrice'] = listPrice

            #颜色
            detail['color'] = color
            detail['colorId'] = productId if isinstance(
                color, basestring) else {key: key
                                         for key in color.keys()}

            #图片集
            detail['img'] = img
            detail['imgs'] = imgs

            #规格
            detail['sizes'] = sizes

            #描述
            detail['descr'] = area('.js-prodInfo-description').text()

            #详细
            detail['detail'] = area('.js-prodInfo-details').text()

            #退货
            detail['returns'] = area('.js-prodInfo-delivery').text()

            #配送
            detail['delivery'] = area('.js-prodInfo-delivery').text()

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = resp.url

            log_info = json.dumps(
                dict(time=time.time(),
                     productId=detail['productId'],
                     name=detail['name'],
                     currency=detail['currency'],
                     price=detail['price'],
                     listPrice=detail['listPrice'],
                     url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #23
0
    def detail(self, url):
        resp = self.session.get(url, verify=False)

        status_code = resp.status_code
        pqhtml = PyQuery(resp.text or 'nothing')

        # 下架
        is_ok, data = self.is_ok_status_code(status_code, pqhtml, url, resp)

        if not is_ok:
            return data

        # 前期准备
        area = pqhtml('.product-detail-information')
        domain = tool.get_domain(url)
        pdata = self.get_pdata(area)

        # exit()

        # 下架
        # if not area :

        #     log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

        #     self.logger.info(log_info)
        #     data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())

        #     return tool.return_data(successful=False, data=data)

        detail = dict()

        # 产品ID
        productId = area('.product-detail-selection-sku').text()
        detail['productId'] = productId
        detail['productSku'] = productId
        detail['productCode'] = productId

        # 品牌
        brand = None
        detail['brand'] = brand

        # 名称
        detail['name'] = area('.J_title_name').text()

        # 货币
        currency = pqhtml('a#select_currency').text().split()[0]
        detail['currency'] = currency
        detail['currencySymbol'] = tool.get_unit(currency)

        # 价格
        price, listPrice = self.get_all_price(area)
        detail['price'] = price
        detail['listPrice'] = listPrice

        # 描述
        detail['descr'] = area('#product-description-tab').text()

        # 详细
        detail['detail'] = area('#product-description-tab').text()

        # 退换货
        detail['returns'] = area('.product-directions').text()

        # 颜色
        # color = self.get_color(area)
        detail['color'] = self.cfg.DEFAULT_ONE_COLOR
        detail['colorId'] = self.cfg.DEFAULT_COLOR_SKU

        # 图片集
        imgs = [
            a.attr('href')
            for a in pqhtml('.product-detail-preview .toolbar>li>a').items()
        ]
        detail['img'] = imgs[0]
        detail['imgs'] = imgs

        # 规格
        detail['sizes'] = self.get_sizes(area)

        # HTTP状态码
        detail['status_code'] = status_code

        # 状态
        detail['status'] = self.cfg.STATUS_SALE

        # 返回链接
        detail['backUrl'] = resp.url

        # 返回的IP和端口
        if resp.raw._original_response.peer:
            detail['ip_port'] = ':'.join(
                map(lambda x: str(x), resp.raw._original_response.peer))

        log_info = json.dumps(
            dict(time=time.time(),
                 productId=detail['productId'],
                 name=detail['name'],
                 currency=detail['currency'],
                 price=detail['price'],
                 listPrice=detail['listPrice'],
                 url=url))

        self.logger.info(log_info)

        return tool.return_data(successful=True, data=detail)
Пример #24
0
    def detail(self, url):
        try:

            resp = self.session.get(url, verify=False)

            #这两行代码为验证做准备,勿动.
            self.domain = tool.get_domain(url)
            self.url = url

            #验证resp
            resp = self.resVerify(resp)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')

            #下架
            if status_code == 404:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())
                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'SCERR', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #前期准备:
            area = pqhtml('.product-view')
            productId = area('.product-ids').attr('content')
            pdata = self.get_pdata(pqhtml)

            # print pqhtml.outerHtml()
            # print area.outerHtml()
            # print json.dumps(pdata)
            # exit()

            #下架
            if not len(PyQuery(pdata['availability'])('.instock')):

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())
                return tool.return_data(successful=False, data=data)

            detail = dict()

            #品牌
            brand = pdata['brand']
            detail['brand'] = brand

            #名称
            detail['name'] = brand + ' ' + pdata['title']

            #货币单位
            currency = pqhtml('meta[itemprop="priceCurrency"]').attr('content')
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #价格
            price, listPrice = self.get_all_price(pdata)
            detail['price'] = price
            detail['listPrice'] = listPrice

            #描述
            detail['descr'] = pqhtml('div.product-description').text()

            #颜色
            detail['color'] = self.cfg.DEFAULT_ONE_COLOR
            detail['colorId'] = pdata['sku']

            #图片集
            detail['img'] = pdata['image']
            detail['imgs'] = pdata['images']

            #规格
            detail['sizes'] = [
                dict(name=self.cfg.DEFAULT_ONE_SIZE,
                     inventory=self.cfg.DEFAULT_STOCK_NUMBER,
                     sku=pdata['sku'])
            ]

            #产品ID
            detail['productId'] = pdata['sku']

            #退换货
            detail['returns'] = pqhtml('dd#tab-container-guarantee').text()

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = url

            log_info = json.dumps(
                dict(time=time.time(),
                     productId=detail['productId'],
                     name=detail['name'],
                     currency=detail['currency'],
                     price=detail['price'],
                     listPrice=detail['listPrice'],
                     url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #25
0
    def detail(self, url):
        try:

            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            domain = tool.get_domain(url)

            #下架:
            if status_code == 404:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'SCERR', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #前期准备
            Jtxt = pqhtml('script').text()
            area = pqhtml('#container')
            pdata = self.get_pdata(Jtxt)
            domain = tool.get_domain(url)

            #下架
            # if not instock :
            # data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())

            # return tool.return_data(successful=False, data=data)

            # print area.outerHtml()
            # exit()

            detail = dict()

            #图片
            imgsTmp = [
                domain + a.attr('href')
                for a in area('form#addToCart ul.alt_imgs:first>li>a').items()
            ]
            detail['img'] = imgsTmp[0]
            detail['imgs'] = imgsTmp

            #名称
            detail['name'] = pdata['product']['name']

            #品牌
            detail['brand'] = area('form#addToCart a#sameBrandProduct').text()

            #价格
            detail['price'] = pdata['product']['unit_sale_price']
            detail['listPrice'] = pdata['product']['unit_price']

            #价格符号
            currency = pdata['product']['currency']
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #产品id
            productId = pdata['product']['id']
            detail['productId'] = productId

            #颜色
            detail['color'] = self.cfg.DEFAULT_ONE_COLOR
            detail['colorId'] = productId

            #规格
            detail['sizes'] = [
                dict(name=self.cfg.DEFAULT_ONE_SIZE,
                     inventory=pdata['product']['stock'],
                     sku=productId)
            ]

            #描述
            detail['descr'] = area('.prod_desc').text() + ' ' + area(
                'div#info_tabs>div.wrap>div#tab1_info').text()

            #详细
            detail['detail'] = area('#tab1_info').text()

            #品牌描述
            detail['brandDescr'] = area('#tab2_info').text()

            #保修
            detail['note'] = area('#tab5_info').text()

            #配送
            detail['delivery'] = area('#shippingData').text()

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = url

            log_info = json.dumps(
                dict(time=time.time(),
                     productId=detail['productId'],
                     name=detail['name'],
                     currency=detail['currency'],
                     price=detail['price'],
                     listPrice=detail['listPrice'],
                     url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #26
0
    def detail(self, url):
        try:
            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'SCERR', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #前期准备
            # area = pqhtml('td[align="left"][width="619"][valign="top"]')
            area = pqhtml('form#productForm #ProductDetailPage #ProductDetails'
                          )  #2016-12-15添加
            pdata = self.get_pdata(pqhtml)
            domain = tool.get_domain(url)

            # print area.outerHtml().encode('utf-8')
            # exit()

            #下架
            if 'SOLD OUT' in pqhtml('font').text():

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)
                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            detail = dict()

            #品牌
            brand = pqhtml('input[id="Brand"]').attr('value')
            detail['brand'] = brand

            #名称
            detail['name'] = area('#DetailsHeading').text()

            #价格
            currency, price, listPrice = self.get_all_price(area)
            detail['price'] = price
            detail['listPrice'] = listPrice

            #货币
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #产品ID
            productId = area('input[name="ChildID"]').attr(
                'value'
            )  #也是colorID,产品ID是area('input[name="MasterID"]').attr('value')
            detail['productId'] = productId

            #图片集
            imgs = self.get_imgs(productId, pdata)
            detail['img'] = imgs[0]
            detail['imgs'] = imgs

            #颜色
            color = self.get_color(productId, pdata)
            detail['color'] = self.cfg.DEFAULT_ONE_COLOR
            detail['colorId'] = productId

            #规格
            detail['sizes'] = self.get_sizes(productId, pdata)

            #描述
            detail['descr'] = area('#Description').text()

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = resp.url

            log_info = json.dumps(
                dict(time=time.time(),
                     productId=detail['productId'],
                     name=detail['name'],
                     currency=detail['currency'],
                     price=detail['price'],
                     listPrice=detail['listPrice'],
                     url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #27
0
    def detail(self, url):
        try:
            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,
                                          message=self.cfg.SOLD_OUT,
                                          backUrl=resp.url,
                                          html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200:

                log_info = json.dumps(
                    dict(time=time.time(),
                         title=pqhtml('title').text(),
                         url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code,
                                      message=self.cfg.GET_ERR.get(
                                          'SCERR', 'ERROR'),
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)

            #前期准备
            # area = pqhtml('.caption-product')
            area = pqhtml('.product-single-section-main')
            imgArea = pqhtml('.slider')
            domain = tool.get_domain(url)
            pdata = self.get_pdata(pqhtml('head'))

            # print area.outerHtml().encode('utf-8')
            # exit()

            #下架
            # if len(area('#variant-listbox')) == 0 :

            #     log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

            #     self.logger.info(log_info)

            #     data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())

            #     return tool.return_data(successful=False, data=data)

            detail = dict()

            #品牌
            brand = pdata['product']['vendor']
            detail['brand'] = brand

            #名称
            detail['name'] = area('h1[itemprop="name"]').text()

            #货币
            currency = pqhtml('meta[itemprop="priceCurrency"]').attr('content')
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #价格
            price, listPrice = self.get_all_price(area)
            detail['price'] = price
            detail['listPrice'] = listPrice

            #颜色
            # color = self.get_color(area)
            detail['color'] = self.cfg.DEFAULT_ONE_COLOR
            detail['colorId'] = pdata['product']['id']

            #图片集
            # imgs = [ 'https:'+a.attr('src') for a in imgArea('img').items()]
            imgs = [
                'http:' + img.attr('src')
                for img in area('.super-slider-main img').items()
            ]
            detail['img'] = imgs[0]
            detail['imgs'] = imgs

            #产品ID
            productId = pdata['product']['id']
            detail['productId'] = productId

            #规格
            detail['sizes'] = self.get_sizes(pdata, area)

            #描述
            detail['descr'] = area('.product-single-details-dropdown').text()

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = resp.url

            log_info = json.dumps(
                dict(time=time.time(),
                     productId=detail['productId'],
                     name=detail['name'],
                     currency=detail['currency'],
                     price=detail['price'],
                     listPrice=detail['listPrice'],
                     url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #28
0
    def detail(self,url):
        try:
            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404 :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
                
                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200 :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get('SCERR','ERROR'), backUrl=resp.url, html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)
            
            #前期准备
            area = pqhtml('.body-wrap .primary-wrap .product-area')
            domain = tool.get_domain(url)
            siteObj = self.get_siteObj(pqhtml)
            
            print area.outerHtml().encode('utf-8')
            # exit()

            #下架
            if 'InStock' != area('meta[itemprop="availability"]').attr('content') or 'sold out' in area('.availability').text().lower() :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)
                data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
                
                return tool.return_data(successful=False, data=data)

            detail = dict()

            #产品ID
            productId = area('input[class="buy"][name="buy"][type="hidden"]').attr('value') or self.get_product_id(siteObj)
            detail['productId'] = productId

            #品牌
            brand = self.get_brand(siteObj)
            detail['brand'] = brand

            #名称
            detail['name'] = area('.product-title').text()

            #货币
            currency = pqhtml('meta[itemprop="priceCurrency"]').attr('content')
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #价格
            price,listPrice = self.get_all_price(area)
            detail['price'] = price
            detail['listPrice'] = listPrice

            #颜色,图片,尺码信息
            if area('.variation-dropdowns') :
                img,imgs,color,sizes = self.get_color_img_size(area,productId)
                detail['keys'] = color.keys()
            else :
                img = area('.main-product-image a').attr('href')
                imgs = [ li_a.attr('href').strip().replace('/300/300/','/600/600/') for li_a in area('ul.product-thumbnails li a').items()]
                color = self.cfg.DEFAULT_ONE_COLOR
                sizes = [dict(name=self.cfg.DEFAULT_ONE_SIZE,id=productId,sku=productId,inventory=self.cfg.DEFAULT_STOCK_NUMBER)]

            #颜色
            # color = self.get_color(area)
            detail['color'] = color
            detail['colorId'] = {cid:cid for cid in color.keys()} if isinstance(color,dict) else productId

            #图片集
            detail['img'] = img
            detail['imgs'] = imgs

            #规格
            detail['sizes'] = sizes

            #描述
            detail['descr'] = area('div[itemprop="description"]').text() + area('div.product-more-details').text()

            #详细
            detail['detail'] = area('div.product-more-details').text()

            #退换货
            detail['returns'] = area('.product-delivery-returns').text()

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = resp.url
            
            #返回的IP和端口
            if resp.raw._original_response.peer :
                detail['ip_port']=':'.join(map( lambda x:str(x),resp.raw._original_response.peer))

            log_info = json.dumps(dict(time=time.time(), 
                                       productId=detail['productId'], 
                                       name=detail['name'], 
                                       currency=detail['currency'], 
                                       price=detail['price'], 
                                       listPrice=detail['listPrice'], 
                                       url=url))

            self.logger.info(log_info)

            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #29
0
    def detail(self,url):
        try:
            resp = self.session.get(url, verify=False)

            status_code = resp.status_code
            pqhtml = PyQuery(resp.text or 'nothing')
            #下架
            if status_code == 404 :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
                
                return tool.return_data(successful=False, data=data)

            #其他错误
            if status_code != 200 :

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)

                data = tool.get_error(code=status_code, message=self.cfg.GET_ERR.get('SCERR','ERROR'), backUrl=resp.url, html=pqhtml.outerHtml())

                return tool.return_data(successful=False, data=data)
            
            #前期准备
            area = pqhtml('.product-detail')
            detail_tab = pqhtml('#product-detail-tabs')
            img_tab = pqhtml('div.images')

            domain = tool.get_domain(url)
            pdata = self.get_pdata(pqhtml)

            # print area.outerHtml().encode('utf-8')
            # print json.dumps(pdata)
            # print detail_tab.outerHtml().encode('utf-8')
            # print img_tab.outerHtml().encode('utf-8')
            
            # exit()

            #下架
            if not area or 'out of stock' in area('.out-of-stock').text():

                log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

                self.logger.info(log_info)
                data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())
                
                return tool.return_data(successful=False, data=data)

            video_prefix = 'http://image1.superdry.com/static/images/products/'

            detail = dict()

            detail['stock'] = pdata['product']['stock']     #该商品总库存.

            detail['video'] = video_prefix+pdata['product']['video']

            detail['gender'] = pdata['product']['gender']

            detail['season'] = pdata['product']['season']

            detail['category'] = pdata['product']['category']

            detail['productSku'] = pdata['product']['sku_code']

            detail['size_guide'] = pdata['product']['size_guide']

            detail['subcategory'] = pdata['product']['subcategory']

            detail['productCode'] = pdata['product']['sku_code']

            #产品ID
            productId = pdata['product']['id']
            detail['productId'] = productId
            
            #品牌
            brand = 'SUPERDRY'
            detail['brand'] = brand

            #名称
            detail['name'] = pdata['product']['name']

            #货币
            currency = pdata['product']['currency']
            detail['currency'] = currency
            detail['currencySymbol'] = tool.get_unit(currency)

            #价格
            detail['price'] = pdata['product']['unit_sale_price']
            detail['listPrice'] = pdata['product']['unit_price']

            #描述
            detail['descr'] =  pdata['product']['description']

            #详细
            detail['detail'] = detail_tab.text()

            #退换货
            detail['returns'] = detail_tab('tab-page:last').text()

            #颜色
            detail['color'] = pdata['product']['color']
            detail['colorId'] = pdata['product']['color']

            #图片集
            imgs = [ ele.attr('src') for ele in img_tab('.scroller li img').items()]
            imgs = map(lambda x : x.replace('/productthumbs/','/zoom/'), imgs)
            detail['img'] = img_tab('.scroller li img:first').attr('src').replace('/productthumbs/','/zoom/')
            detail['imgs'] = imgs

            #规格
            detail['sizes'] = self.get_sizes(area)

            #HTTP状态码
            detail['status_code'] = status_code

            #状态
            detail['status'] = self.cfg.STATUS_SALE

            #返回链接
            detail['backUrl'] = resp.url
            
            #返回的IP和端口
            if resp.raw._original_response.peer :
                detail['ip_port']=':'.join(map( lambda x:str(x),resp.raw._original_response.peer))

            log_info = json.dumps(dict(time=time.time(), 
                                       productId=detail['productId'], 
                                       name=detail['name'], 
                                       currency=detail['currency'], 
                                       price=detail['price'], 
                                       listPrice=detail['listPrice'], 
                                       url=url))

            self.logger.info(log_info)


            return tool.return_data(successful=True, data=detail)

        except Exception, e:
            raise
Пример #30
0
    def detail(self, url):
        resp = self.session.get(url, verify=False)

        status_code = resp.status_code
        pqhtml = PyQuery(resp.text or 'nothing')
        #下架
        if status_code == 404:

            log_info = json.dumps(
                dict(time=time.time(), title=pqhtml('title').text(), url=url))

            self.logger.info(log_info)

            data = tool.get_off_shelf(code=status_code,
                                      message=self.cfg.SOLD_OUT,
                                      backUrl=resp.url,
                                      html=pqhtml.outerHtml())

            return tool.return_data(successful=False, data=data)

        #其他错误
        if status_code != 200:

            log_info = json.dumps(
                dict(time=time.time(), title=pqhtml('title').text(), url=url))

            self.logger.info(log_info)

            data = tool.get_error(code=status_code,
                                  message=self.cfg.GET_ERR.get(
                                      'SCERR', 'ERROR'),
                                  backUrl=resp.url,
                                  html=pqhtml.outerHtml())

            return tool.return_data(successful=False, data=data)

        #前期准备
        # area = pqhtml('.product-detail-information')
        self.domain = tool.get_domain(url)
        # pdata = self.get_pdata(area)

        # print pqhtml.outerHtml().encode('utf-8')
        # exit()

        #下架
        # if True :

        # log_info = json.dumps(dict(time=time.time(),title=pqhtml('title').text(),url=url))

        # self.logger.info(log_info)
        # data = tool.get_off_shelf(code=status_code,message=self.cfg.SOLD_OUT,backUrl=resp.url, html=pqhtml.outerHtml())

        # return tool.return_data(successful=False, data=data)

        detail = dict()

        #品牌
        brand = self.getBrandByHtml(pqhtml).strip()
        detail['brand'] = brand or 'MYGEEK'

        #名称
        detail['name'] = pqhtml('span.title strong').text().strip()

        #货币
        currency = 'CNY'
        detail['currency'] = currency
        detail['currencySymbol'] = tool.get_unit(currency)

        #价格
        price, listPrice = self.getPriceByHtml(pqhtml)
        detail['price'] = price
        detail['listPrice'] = listPrice

        #颜色
        color = self.get_color(pqhtml)
        detail['color'] = color
        detail['colorId'] = {k: k
                             for k in color.keys()} if isinstance(
                                 color, dict) else self.cfg.DEFAULT_COLOR_SKU

        #skus:
        if isinstance(color, dict):
            detail['keys'] = color.keys()

        #图片集
        imgs = self.getImgsByHtml(pqhtml)
        detail['img'] = imgs[0]
        detail['imgs'] = imgs

        #产品ID
        productId = re.search(
            r'id=(\d*)',
            pqhtml('div.pid5 form:first').attr('action')).groups()[0]
        detail['productId'] = productId

        #规格
        detail['sizes'] = self.getSizesByHtml(pqhtml)

        #描述
        detail['descr'] = pqhtml('#pid1_2').remove('.title').remove(
            'script').text() + pqhtml('.pid2').remove('.title').remove(
                'script').text()

        #HTTP状态码
        detail['status_code'] = status_code

        #状态
        detail['status'] = self.cfg.STATUS_SALE

        #返回链接
        detail['backUrl'] = resp.url

        log_info = json.dumps(
            dict(time=time.time(),
                 productId=detail['productId'],
                 name=detail['name'],
                 currency=detail['currency'],
                 price=detail['price'],
                 listPrice=detail['listPrice'],
                 url=url))

        self.logger.info(log_info)

        return tool.return_data(successful=True, data=detail)