Exemplo n.º 1
0
    def process(self, html):
        if html == '' or html == 'None':
            print "Can't get them html from https://www.amazon.co.jp"
            sys.exit()
        tree = etree.HTML(html)
        data = []

        # 总产品页
        listDoms = tree.xpath(
            "//*[contains(@class, 's-result-list-parent-container')]/ul/li")
        # print (len(listDoms))
        if (listDoms):
            for itemDom in listDoms:
                # print (itemDom)
                item = {}
                # 抓取asin
                try:
                    asin = itemDom.xpath("@data-asin")
                    if (asin):
                        item['asin'] = asin[0].strip()
                        # print (item['asin'])
                except:
                    print("asin error")

                # 抓取title
                try:
                    titleDom = itemDom.xpath(
                        "div//a[contains(@class, 's-access-detail-page')]/@title"
                    )
                    if (titleDom):
                        item['title'] = titleDom[0].strip()
                        # print (item['title'])
                except:
                    print("title error")

                # 抓取image
                try:
                    imageDom = itemDom.xpath(
                        "div//img[contains(@class, 's-access-image')]/@src")
                    if (imageDom):
                        item['image'] = Model_Processor().formatImage(
                            imageDom[0])
                        # print (item['image_url'])
                        imagewidthDom = itemDom.xpath(
                            "div//img[contains(@class, 's-access-image')]/@width"
                        )
                        if (imagewidthDom):
                            item['width'] = imagewidthDom[0].strip()
                            # print (item['image_width'])
                        imageheightDom = itemDom.xpath(
                            "div//img[contains(@class, 's-access-image')]/@height"
                        )
                        if (imageheightDom):
                            item['height'] = imageheightDom[0].strip()
                            # print (item['image_height'])
                except:
                    print("image error")

                # 抓取price
                try:
                    priceDom = itemDom.xpath(
                        "div//span[contains(@class, 's-price')]/text()")
                    if (priceDom):
                        item['price'] = Model_Processor().formatNumber(
                            priceDom[0], "co.jp")
                except:
                    print("price error")

                # 抓取list_price
                try:
                    list_priceDom = itemDom.xpath(
                        "div//span[contains(@class, 'a-text-strike')]/text()")
                    if (list_priceDom):
                        item['list_price'] = Model_Processor().formatNumber(
                            list_priceDom[0], "co.jp")
                except:
                    print("list_price error")

                # 抓取rating
                try:
                    ratingDom = itemDom.xpath(
                        "div//i[contains(@class, 'a-icon-star')]/span/text()")
                    if (ratingDom):
                        item['seller_rating'] = Model_Processor().formatRating(
                            ratingDom[0], "co.jp")
                        # print (item['seller_rating'])
                except Exception as err:
                    print("seller_rating error")

                # 抓取review_count
                try:
                    review_countDom = itemDom.xpath(
                        "div//div[@class='a-row a-spacing-none']/a/text()")
                    if (review_countDom):
                        item['review_count'] = Model_Processor().formatNumber(
                            review_countDom[0], "co.jp")
                        # print (item['review_count'])
                except:
                    print("review_count error")

                # 抓取bestseller
                try:
                    bestsellerDom = itemDom.xpath(
                        "div//span[contains(@class, 'sx-bestseller')]/a/@href")
                    href = bestsellerDom[0].replace("/gp/bestsellers/",
                                                    "").split("/")
                    if (len(href) > 2):
                        item['bestseller_search_index'] = href[0]
                        item['bestseller_browse_node_id'] = href[1]
                        # print (item['bestseller_search_index'])
                        # print (item['bestseller_browse_node_id'])
                    elif (len(href) == 2):
                        item['bestseller_search_index'] = href[0]
                        # print (item['bestseller_search_index'])
                except:
                    pass
                # print (item)
                data.append(item)
        # # 产品结果页1 btfResult
        # listDom1 = tree.xpath("//*[@id='atfResults']/ul/li")
        # # print (len(listDom1))
        # # 产品结果页2 btfResult
        # listDom2 = tree.xpath("//*[@id='btfResults']/ul/li")
        # # print (len(listDom2))
        # print (len(data))
        return data
Exemplo n.º 2
0
    def process(self, html):
        if html == '' or html == 'None':
            print "Can't get them html from https://www.amazon.it"
            sys.exit()
        tree = etree.HTML(html)
        data = {}

        # sellerLogo 卖家标志 //*[@id="sellerLogo"]
        try:
            logoDom = tree.xpath("//*[@id='sellerLogo']/@src")
            # print (logoDom)
            if (logoDom):
                data['logo_url'] = logoDom[0]
        except:
            print("sellerLogo error")

        # sellerName 卖家名字
        try:
            nameDom = tree.xpath("//*[@id='sellerName']/text()")
            if (nameDom):
                data['name'] = nameDom[0].strip()
        except:
            print("sellerName error")

        # rating 卖家评级
        try:
            ratingDom = tree.xpath(
                "//*[@id='seller-feedback-summary']//*[@class='a-icon-alt']/text()"
            )
            if (ratingDom):
                data['rating'] = Model_Processor().formatRating(
                    ratingDom[0], "it")
        except:
            print("seller rating error")

        try:
            feedbackDom = tree.xpath("//*[@id='feedback-summary-table']//tr")
            if (feedbackDom):
                # print (feedbackDom)
                for itemDom in feedbackDom:
                    # print (itemDom.xpath("td[1]/text()"))
                    typeDom = itemDom.xpath("td[1]/text()")
                    if (typeDom):
                        type = Model_Processor().formatType(
                            typeDom[0].strip()).lower()
                        # print (type)
                        # feedback type thirty_days
                        thirtyDaysDom = itemDom.xpath("td[2]/span/text()")
                        if (thirtyDaysDom):
                            # print (thirtyDaysDom[0].strip())
                            if (thirtyDaysDom[0].strip() != '-'):
                                thirtyDaysDom = Model_Processor().formatNumber(
                                    thirtyDaysDom[0].strip(), "it")
                                data['feedback_' + type +
                                     '_thirty_days'] = thirtyDaysDom
                            else:
                                data['feedback_' + type + '_thirty_days'] = ""
                        # feedback type ninty_days
                        nintyDaysDom = itemDom.xpath("td[3]/span/text()")
                        if (nintyDaysDom):
                            # print (nintyDaysDom[0].strip())
                            if (nintyDaysDom[0].strip() != '-'):
                                nintyDaysDom = Model_Processor().formatNumber(
                                    nintyDaysDom[0].strip(), "it")
                                data['feedback_' + type +
                                     '_ninty_days'] = nintyDaysDom
                            else:
                                data['feedback_' + type + '_ninty_days'] = ""
                        # feedback type twelve_months
                        twelveMonthsDom = itemDom.xpath("td[4]/span/text()")
                        if (twelveMonthsDom):
                            # print (twelveMonthsDom[0].strip())
                            if (twelveMonthsDom[0].strip() != '-'):
                                twelveMonthsDom = Model_Processor(
                                ).formatNumber(twelveMonthsDom[0].strip(),
                                               "it")
                                data['feedback_' + type +
                                     '_twelve_months'] = twelveMonthsDom
                            else:
                                data['feedback_' + type +
                                     '_twelve_months'] = ""
                        # feedback type lifetime
                        lifetimeDom = itemDom.xpath("td[5]/span/text()")
                        if (lifetimeDom):
                            # print (lifetimeDom[0].strip())
                            if (lifetimeDom[0].strip() != '-'):
                                lifetimeDom = Model_Processor().formatNumber(
                                    lifetimeDom[0].strip(), "it")
                                data['feedback_' + type +
                                     '_lifetime'] = lifetimeDom
                            else:
                                data['feedback_' + type + '_lifetime'] = ""
            else:
                print("no feedback")
        except:
            print("feedback info error")

        if (len(data)):
            return data

        return False
Exemplo n.º 3
0
    def process(self, html, page_id=1):
        if html == '' or html == 'None':
            print "Can't get them html from https://www.amazon.com"
            sys.exit()

        tree = etree.HTML(html)
        data = []
        # https://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=handbags
        try:
            # 处理中间产品
            # listDom1 = tree.xpath('//*[@id="atfResults"]/ul/li')
            # listDom2 = tree.xpath('//*[@id="btfResults"]/ul/li')
            listDoms = tree.xpath(
                "//*[@class='a-row s-result-list-parent-container']/ul/li")
            # print (len(listDoms))
            if (listDoms):
                page_position = 1
                sponsor_position = 1
                previous_sponsor_position_type = 'top'
                for itemDom in listDoms:
                    item = {
                        'sponsor': 0,
                        'page_id': page_id,
                        'page_position': page_position
                    }
                    # 标记为sponsor的产品
                    sponsorDom = itemDom.xpath(
                        "div//*[contains(@class, 's-sponsored-list-header')]/text()"
                    )
                    if (sponsorDom):
                        item['sponsor'] = 1
                    else:
                        # a-color-tertiary
                        sponsorDom = itemDom.xpath(
                            "div//h5[contains(@class, 'a-color-tertiary')]/text()"
                        )
                        if (sponsorDom):
                            item['sponsor'] = 1
                    if (item['sponsor'] == 1):
                        if (page_position <= 4):
                            sponsor_position_type = 'top'
                        else:
                            sponsor_position_type = 'bottom'
                            if (sponsor_position_type !=
                                    previous_sponsor_position_type):
                                sponsor_position = 1
                                previous_sponsor_position_type = 'bottom'
                        item['sponsor_position_type'] = sponsor_position_type
                        item['sponsor_position'] = sponsor_position
                        sponsor_position += 1
                    page_position += 1

                    # asin
                    try:
                        asin = itemDom.xpath("@data-asin")
                        if (asin):
                            item['asin'] = asin[0]
                        else:
                            continue
                    except:
                        print("asin error")

                    # 标题 title
                    try:
                        title = itemDom.xpath(
                            "div//*[contains(@class, 's-access-detail-page')]/h2/text()"
                        )
                        if (title):
                            item['title'] = title[0]
                    except:
                        print("title error")

                    # 图片 images
                    try:
                        try:
                            image = itemDom.xpath(
                                "div//*[contains(@class, 's-access-image')]/@src"
                            )
                            if (image):
                                item['image'] = Model_Processor().formatImage(
                                    image[0])
                        except:
                            print("image error")
                        # 宽度
                        try:
                            width = itemDom.xpath(
                                "div//*[contains(@class, 's-access-image')]/@width"
                            )
                            if (width):
                                item['width'] = width[0]
                        except:
                            print("width error")
                        # 高度
                        try:
                            height = itemDom.xpath(
                                "div//*[contains(@class, 's-access-image')]/@height"
                            )
                            if (height):
                                item['height'] = height[0]
                        except:
                            print("height error")
                    except:
                        print("images error")
                        # bestsellerDom
                    try:
                        bestsellerDom = itemDom.xpath(
                            "div//*[contains(@class, 'sx-badge-rectangle')]/span/text()"
                        )
                        if (bestsellerDom):
                            if (bestsellerDom[0] == "Best Seller"):
                                bestseller_node_id = itemDom.xpath(
                                    "div//*[contains(@class, 'sx-badge-region')]/div/a/@href"
                                )
                                if (bestseller_node_id):
                                    bestseller_id = bestseller_node_id[
                                        0].strip().split("bestsellers/")[
                                            1].strip().split("/")[1].strip()
                                    if (bestseller_id.isdigit()):
                                        item[
                                            'bestseller_node_id'] = bestseller_id
                                        # print (item['bestseller_node_id'])
                    except:
                        print("bestseller error")

                    # fba
                    # 一般产品的xpath
                    try:
                        fba = itemDom.xpath(
                            "div//*[contains(@class, 'a-icon-prime')]/span/text()"
                        )
                        if (fba):
                            if (fba[0].strip() == "Prime"):
                                item['is_fba'] = "1"
                            else:
                                item['is_fba'] = "0"
                        else:
                            item['is_fba'] = "0"
                    except:
                        print("fba error")

                    # 价格 price
                    try:
                        price = itemDom.xpath(
                            "div//*[contains(@class, 'a-color-base')]/@aria-label"
                        )
                        if (price):
                            if (Model_Processor().formatNumber(
                                    price[0], "co.jp").isdigit()):
                                item['price'] = Model_Processor().formatNumber(
                                    price[0], "co.jp")
                        else:
                            price = itemDom.xpath(
                                "div//*[contains(@class, 'a-color-base')]/text()"
                            )
                            if (price):
                                if (Model_Processor().formatNumber(
                                        price[0], "co.jp").isdigit()):
                                    item['price'] = Model_Processor(
                                    ).formatNumber(price[0], "co.jp")
                            else:
                                price = itemDom.xpath(
                                    "div//*[contains(@class, 's-price')]/text()"
                                )
                                if (price):
                                    if (Model_Processor().formatNumber(
                                            price[0], "co.jp").isdigit()):
                                        item['price'] = Model_Processor(
                                        ).formatNumber(price[0], "co.jp")
                    except:
                        print("price error")

                    # 原价 list_price
                    try:
                        list_price = itemDom.xpath(
                            "div//*[contains(@class, 'a-text-strike')]/text()")
                        if (list_price):
                            if (Model_Processor().formatNumber(
                                    list_price[0], "co.jp").isdigit()):
                                item['list_price'] = Model_Processor(
                                ).formatNumber(list_price[0], "co.jp")
                    except:
                        print("list_price error")

                    # 评级 rating a-icon-star
                    try:
                        rating = itemDom.xpath(
                            "div//*[contains(@class, 'a-icon-star')]/span/text()"
                        )
                        if (rating):
                            item['rating'] = Model_Processor().formatRating(
                                rating[0], "co.jp")
                    except:
                        print("rating error")

                    # 评论数 review_count //*[@id="result_0"]/div/div[3]/div[3]/a
                    try:
                        review_count = itemDom.xpath(
                            "div//*[contains(@class, 'a-span5')]/div/a/text()")
                        if (review_count):
                            count = Model_Processor().formatNumber(
                                review_count[0], "co.jp")
                            if (count.isdigit()):
                                item['review_count'] = count
                        else:
                            review_count = itemDom.xpath(
                                "div//*[contains(@class, 'a-row a-spacing-none')]/a/text()"
                            )
                            if (review_count):
                                for i in review_count:
                                    if (i.replace(",", "").strip().isdigit()):
                                        count = Model_Processor().formatNumber(
                                            i.replace(",", "").strip(),
                                            "co.jp")
                                        if (count.isdigit()):
                                            item['review_count'] = count
                    except:
                        print("review_count error")
                    # print (item)
                    data.append(item)

            # 处理右侧广告
            try:
                rightTitleDom = tree.xpath(
                    "//*[@id='paRightContent']//h1/text()")
                if (rightTitleDom):
                    if (rightTitleDom[0].strip() == "Sponsored"):
                        rightListDom = tree.xpath(
                            "//*[@id='paRightContent']//*[contains(@class, 'pa-ad-details')]"
                        )
                        if (rightListDom):
                            sponsor_position = 1
                            for itemDom in rightListDom:
                                item = {
                                    'sponsor': 1,
                                    'sponsor_position_type': 'right',
                                    'page_id': page_id,
                                    'page_position': page_position,
                                    'sponsor_position': sponsor_position
                                }
                                page_position += 1
                                sponsor_position += 1
                                # 获取广告ASIN
                                linkDoms = itemDom.xpath("div/a/@href")
                                if (linkDoms):
                                    for linkDom in linkDoms:
                                        item['asin'] = linkDom.split("%2F")[4]
                                        # if not item['asin']:
                                        # //*[@id="desktop-rhs-carousels_click_within_right"]/div/div[3]/div[2]/div[3]/a[1]
                                    if not item['asin']:
                                        linkDoms = itemDom.xpath(
                                            "div/div['a-section']/a")
                                        if (linkDoms):
                                            # for linkDom in linkDoms:
                                            if (linkDom.text):
                                                item[
                                                    'review_count'] = linkDom.text
                                                item['asin'] = linkDom.xpath(
                                                    "@href")[0].split("/")[4]
                                    else:
                                        # 获取评论数
                                        review_countDom = itemDom.xpath(
                                            "div/div['a-section']/a[2]/text()")
                                        if (review_countDom):
                                            item[
                                                'review_count'] = Model_Processor(
                                                ).formatNumber(
                                                    review_countDom[0],
                                                    "co.jp")
                                            # print (review_countDom[0])
                                            # item['review_count']
                                # 获取广告图片
                                try:
                                    imageDom = itemDom.xpath(
                                        "div//*[@alt='Product Details']/@src")
                                    if (imageDom):
                                        item['image'] = Model_Processor(
                                        ).formatImage(imageDom[0])
                                except:
                                    print("ad image error")
                                # 获取广告标题
                                try:
                                    titleDom = itemDom.xpath(
                                        "div['a-section']/div/a/@title")
                                    if (titleDom):
                                        item['title'] = titleDom[0]
                                except:
                                    print("ad title error")
                                # 获取广告价格
                                try:
                                    priceDom = itemDom.xpath(
                                        "div//*/@aria-label")
                                    if (priceDom):
                                        if (Model_Processor().formatNumber(
                                                priceDom[0],
                                                "co.jp").isdigit()):
                                            item['price'] = Model_Processor(
                                            ).formatNumber(
                                                priceDom[0], "co.jp")
                                except:
                                    print("ad price error")
                                # 获取广告原价
                                try:
                                    list_priceDom = itemDom.xpath(
                                        "div//*[contains(@class, 'a-text-strike')]/text()"
                                    )
                                    if (list_priceDom):
                                        if (Model_Processor().formatNumber(
                                                list_priceDom[0],
                                                "co.jp").isdigit()):
                                            item[
                                                'list_price'] = Model_Processor(
                                                ).formatNumber(
                                                    list_priceDom[0], "co.jp")
                                except:
                                    print("ad list_price error")
                                # 获取rating a-icon-star
                                try:
                                    ratingDom = itemDom.xpath(
                                        "div//*[contains(@class, 'a-icon-star')]/span/text()"
                                    )
                                    if (ratingDom):
                                        item['rating'] = Model_Processor(
                                        ).formatRating(ratingDom[0], "co.jp")
                                except:
                                    print("ad rating error")
                                # print (item)
                                data.append(item)
            except:
                print("right ad error")
            #计算总数
            totalDom = tree.xpath("//*[@id='s-result-count']/text()")
            if (totalDom):
                # 总数 >1
                # print (totalDom[0])
                total = Model_Processor().formatNumber(
                    totalDom[0].split(" ")[1].replace("件中", ""), "co.jp")
                if (total.isdigit()):
                    total = {"total": total}
                    data.append(total)
                else:
                    # 总数 ==1
                    total = Model_Processor().formatNumber(
                        totalDom[0].replace("件の結果", ""), "co.jp")
                    if (total.isdigit()):
                        total = {"total": total}
                        data.append(total)
                # print (Model_Processor().formatNumber(totalDom[0].split(" ")[2]))
            else:
                # 总数为0 //*[@id="noResultsTitle"]/span[1]
                totalDom = tree.xpath("//*[@id='noResultsTitle']/span/text()")
                if (totalDom):
                    if (totalDom[1] == "0" and totalDom[2] == "検索結果"):
                        print totalDom[1]
                        print totalDom[2]
                        total = {"total": "0"}
                        data.append(total)
        except Exception as err:
            print(err)
        # print (len(data)-1)
        # print (data[-1])
        if (len(data) > 0):
            return data
Exemplo n.º 4
0
    def process(self, html, begin, end):
        if html == '' or html == 'None':
            print "Can't get them html from https://www.amazon.it"
            sys.exit()
        tree = etree.HTML(html)
        data = []
        # print (html)
        for i in range(begin, end):
            reviewerDom = tree.xpath("//*[@id='reviewer" + str(i) + "']")
            if (reviewerDom):
                item = {}
                # 抓取rank //*[@id="reviewer1"]/td[1]
                try:
                    rankDom = reviewerDom[0].xpath("td[1]/text()")
                    if (rankDom):
                        item['rank'] = Model_Processor().formatNumber(
                            rankDom[0], "it")
                        # print (item['rank'])
                except Exception as err:
                    print(err)

                # 抓取profile_image_url
                try:
                    imgDom = reviewerDom[0].xpath(
                        "td[@class='img']/a/img/@src")
                    if (imgDom):
                        item['profile_image_url'] = Model_Processor(
                        ).formatImage(imgDom[0])
                        # print (item['profile_image_url'])
                except:
                    pass

                # 抓取id
                try:
                    idDom = reviewerDom[0].xpath("td[3]/a/@href")
                    if (idDom):
                        item['top_reviewer_id'] = idDom[0].split(
                            "/")[4].strip()
                        # print (item['id'])
                except Exception as err:
                    print(err)

                # 抓取name
                try:
                    nameDom = reviewerDom[0].xpath("td[3]/a/b/text()")
                    if (nameDom):
                        item['name'] = nameDom[0].strip()
                        # print (item['name'])
                except Exception as err:
                    print(err)

                # 抓取review_count
                try:
                    totalReviewsDom = reviewerDom[0].xpath("td[4]/text()")
                    if (totalReviewsDom):
                        item['review_count'] = Model_Processor().formatNumber(
                            totalReviewsDom[0], "it")
                        # print (item['review_count'])
                except Exception as err:
                    print(err)

                # 抓取helpful_vote_count
                try:
                    helpfulVotesDom = reviewerDom[0].xpath("td[5]/text()")
                    if (helpfulVotesDom):
                        item['helpful_vote_count'] = Model_Processor(
                        ).formatNumber(helpfulVotesDom[0], "it")
                        # print (item['helpful_vote_count'])
                except Exception as err:
                    print(err)

                # 抓取helpful_vote_ratio
                try:
                    percentHelpfulDom = reviewerDom[0].xpath("td[6]/text()")
                    if (percentHelpfulDom):
                        item['helpful_vote_ratio'] = Model_Processor(
                        ).formatNumber(percentHelpfulDom[0], "it")
                        # print (item['percent_helpful_vote'])
                except Exception as err:
                    print(err)

                if (len(item) > 0):
                    data.append(item)
            else:
                print("Pattern Mismatch: Dom [#reviewer" + str(i) +
                      "] not found.")
                continue
        if (len(data) > 0):
            # print (len(data))
            return data

        print("No available top reviewer data found.")
        return False
Exemplo n.º 5
0
    def process(self, html, page_id=1):
        if html == '' or html == 'None':
            print "Can't get them html from https://www.amazon.fr"
            sys.exit()

        tree = etree.HTML(html)
        data = []
        # https://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=handbags
        try:
            # 处理中间产品
            listDom1 = tree.xpath('//*[@id="atfResults"]/ul/li')
            listDom2 = tree.xpath('//*[@id="btfResults"]/ul/li')
            listDoms = tree.xpath("//*[@class='a-row s-result-list-parent-container']/ul/li")
            # print (len(listDoms))
            if (listDoms):
                page_position = 1
                sponsor_position = 1
                previous_sponsor_position_type = 'top'
                for itemDom in listDoms:
                    item = {'sponsor': 0, 'page_id': page_id, 'page_position': page_position}
                    # 标记为sponsor的产品
                    sponsorDom = itemDom.xpath("div//*[contains(@class, 's-sponsored-list-header')]/text()")
                    if (sponsorDom):
                        item['sponsor'] = 1
                    else:
                        # a-color-tertiary
                        sponsorDom = itemDom.xpath("div//h5[contains(@class, 'a-color-tertiary')]/text()")
                        if (sponsorDom):
                            item['sponsor'] = 1
                    if (item['sponsor'] == 1):
                        if (page_position <= 4):
                            sponsor_position_type = 'top'
                        else:
                            sponsor_position_type = 'bottom'
                            if (sponsor_position_type != previous_sponsor_position_type):
                                sponsor_position = 1
                                previous_sponsor_position_type = 'bottom'
                        item['sponsor_position_type'] = sponsor_position_type
                        item['sponsor_position'] = sponsor_position
                        sponsor_position += 1
                    page_position += 1

                    # asin
                    try:
                        asin = itemDom.xpath("@data-asin")
                        if (asin):
                            item['asin'] = asin[0]
                        else:
                            continue
                    except:
                        print ("asin error")

                    # 标题 title
                    try:
                        title = itemDom.xpath("div//*[contains(@class, 's-access-detail-page')]/h2/text()")
                        if (title):
                            item['title'] = title[0]
                    except:
                        print ("title error")

                    # 图片 images
                    try:
                        try:
                            image = itemDom.xpath("div//*[contains(@class, 's-access-image')]/@src")
                            if (image):
                                item['image'] = Model_Processor().formatImage(image[0])
                        except:
                            print ("image error")
                        # 宽度
                        try:
                            width = itemDom.xpath("div//*[contains(@class, 's-access-image')]/@width")
                            if (width):
                                item['width'] = width[0]
                        except:
                            print ("width error")

                        # 高度
                        try:
                            height = itemDom.xpath("div//*[contains(@class, 's-access-image')]/@height")
                            if (height):
                                item['height'] = height[0]
                        except:
                            print ("height error")
                    except:
                        print ("images error")
                        # bestsellerDom
                    try:
                        bestsellerDom = itemDom.xpath("div//*[contains(@class, 'sx-badge-rectangle')]/span/text()")
                        if (bestsellerDom):
                            if (bestsellerDom[0] == "Best Seller"):
                                bestseller_node_id = itemDom.xpath(
                                    "div//*[contains(@class, 'sx-badge-region')]/div/a/@href")
                                if (bestseller_node_id):
                                    bestseller_id = bestseller_node_id[0].strip().split("bestsellers/")[1].strip().split("/")[1].strip()
                                    if (bestseller_id.isdigit()):
                                        item['bestseller_node_id'] = bestseller_id
                                        # print (item['bestseller_node_id'])
                    except:
                        print ("bestseller error")

                    # fba
                    # 一般产品的xpath
                    try:
                        fba = itemDom.xpath("div//*[contains(@class, 'a-icon-premium')]/span/text()")
                        if (fba):
                            if (fba[0].strip() == "Écran"):
                                item['is_fba'] = "1"
                            else:
                                item['is_fba'] = "0"
                        else:
                            item['is_fba'] = "0"
                    except:
                        print ("fba error")

                    # 价格 price
                    try:
                        price = itemDom.xpath("div//*[contains(@class, 'a-color-base')]/@aria-label")
                        if (price):
                            if (Model_Processor().formatNumber(price[0], "fr").isdigit()):
                                item['price'] = Model_Processor().formatNumber(price[0], "fr")
                        else:
                            price = itemDom.xpath("div//*[contains(@class, 'a-color-base')]/text()")
                            if (price):
                                if (Model_Processor().formatNumber(price[0], "fr").isdigit()):
                                    item['price'] = Model_Processor().formatNumber(price[0], "fr")
                            else:
                                price = itemDom.xpath("div//*[contains(@class, 's-price')]/text()")
                                if (price):
                                    if (Model_Processor().formatNumber(price[0], "fr").isdigit()):
                                        item['price'] = Model_Processor().formatNumber(price[0], "fr")
                    except:
                        print ("price error")

                    # 原价 list_price
                    try:
                        list_price = itemDom.xpath("div//*[contains(@class, 'a-text-strike')]/text()")
                        if (list_price):
                            if (Model_Processor().formatNumber(list_price[0], "fr").isdigit()):
                                item['list_price'] = Model_Processor().formatNumber(list_price[0], "fr")
                    except:
                        print ("list_price error")

                    # 评级 rating a-icon-star
                    try:
                        rating = itemDom.xpath("div//i[contains(@class, 'a-icon-star')]/span/text()")
                        if (rating):
                            item['rating'] = Model_Processor().formatRating(rating[0], "fr")
                    except:
                        print ("rating error")

                    # 评论数 review_count //*[@id="result_0"]/div/div[3]/div[3]/a
                    try:
                        review_count = itemDom.xpath("div//*[contains(@class, 'a-span5')]/div/a/text()")
                        if (review_count):
                            count = Model_Processor().formatNumber(review_count[0], "fr")
                            if (count.isdigit()):
                                item['review_count'] = count
                        else:
                            review_count = itemDom.xpath("div//*[contains(@class, 'a-spacing-top-mini')]/a/text()")
                            if (review_count):
                                count = Model_Processor().formatNumber(review_count[0], "fr")
                                if (count.isdigit()):
                                    item['review_count'] = count
                    except:
                        print ("review_count error")

                    # print (item)
                    data.append(item)
            # if (listDom1):
            #     page_position = 1
            #     sponsor_position = 1
            #     previous_sponsor_position_type = 'top'
            #     # print (len(listDom1))
            #     for itemDom in listDom1:
            #         item = {'sponsor': 0, 'page_id': 1, 'page_position': page_position}
            #         # 标记为sponsor的产品 s-sponsored-list-header //*[@id="result_48"]/div/h5 //*[@id="result_24"]/div/div/div/div[a-fixed-left-grid-col a-col-right]/h5
            #         sponsorDom = itemDom.xpath("div//*[contains(@class, 's-sponsored-list-header')]/text()")
            #         # sponsorDom = soup.select(".s-sponsored-list-header")
            #         if (sponsorDom):
            #             item['sponsor'] = 1
            #         else:
            #             # a-color-tertiary
            #             sponsorDom = itemDom.xpath("div//h5[contains(@class, 'a-color-tertiary')]/text()")
            #             if (sponsorDom):
            #                 item['sponsor'] = 1
            #         if (item['sponsor'] == 1):
            #             if (page_position <= 4):
            #                 sponsor_position_type = 'top'
            #             else:
            #                 sponsor_position_type = 'bottom'
            #                 if (sponsor_position_type != previous_sponsor_position_type):
            #                     sponsor_position = 1
            #                     previous_sponsor_position_type = 'bottom'
            #             item['sponsor_position_type'] = sponsor_position_type
            #             item['sponsor_position'] = sponsor_position
            #             sponsor_position += 1
            #         page_position += 1
            #
            #         # asin
            #         try:
            #             asin = itemDom.xpath("@data-asin")
            #             if (asin):
            #                 item['asin'] = asin[0]
            #                 # print (item['asin'])
            #             # else:
            #             #     print ("no asin")
            #         except:
            #             print ("asin error")
            #
            #         # 标题 title
            #         try:
            #             title = itemDom.xpath("div//*[contains(@class, 's-access-detail-page')]/h2/text()")
            #             if (title):
            #                 item['title'] = title[0]
            #                 # print (title[0])
            #             # else:
            #             #     print ("no title")
            #         except:
            #             print ("title error")
            #
            #         # 图片 images
            #         try:
            #             try:
            #                 image = itemDom.xpath("div//*[contains(@class, 's-access-image')]/@src")
            #                 if (image):
            #                     item['image'] = Model_Processor().formatImage(image[0])
            #                 # else:
            #                 #     print ("no image")
            #                 # print (item['image'])
            #             except:
            #                 print ("image error")
            #             # 宽度
            #             try:
            #                 width = itemDom.xpath("div//*[contains(@class, 's-access-image')]/@width")
            #                 if (width):
            #                     item['width'] = width[0]
            #                 # else:
            #                 #     print ("no image width")
            #                 # print (item['width'])
            #             except:
            #                 print ("width error")
            #             # 高度
            #             try:
            #                 height = itemDom.xpath("div//*[contains(@class, 's-access-image')]/@height")
            #                 if (height):
            #                     item['height'] = height[0]
            #                 # else:
            #                 #     print ("no image height")
            #                 # print (item['height'])
            #             except:
            #                 print ("height error")
            #         except:
            #             print ("images error")
            #
            #         # bestsellerDom
            #         try:
            #             bestsellerDom = itemDom.xpath("div//*[contains(@class, 'sx-badge-rectangle')]/span/text()")
            #             if (bestsellerDom):
            #                 if (bestsellerDom[0] == "Best Seller"):
            #                     bestseller_node_id = itemDom.xpath("div//*[contains(@class, 'sx-badge-region')]/div/a/@href")
            #                     if (bestseller_node_id):
            #                         # /gp/bestsellers/electronics/2407761011/ref=sr_bs_19_2407761011_1
            #                         # /gp/bestsellers/electronics/12557637011/ref=sr_bs_25_12557637011_1
            #                         # /gp/bestsellers/electronics/15124502011/ref=sr_bs_27_15124502011_1
            #                         bestseller_id = bestseller_node_id[0].strip().split("bestsellers/")[1].strip().split("/")[1].strip()
            #                         if (bestseller_id.isdigit()):
            #                             item['bestseller_node_id'] = bestseller_id
            #                         # print (item['bestseller_node_id'])
            #             # else:
            #             #     print ("no bestseller")
            #         except:
            #             print ("bestseller error")
            #
            #         # fba
            #         # 一般产品的xpath
            #         try:
            #             fba = itemDom.xpath("div//*[contains(@class, 'a-icon-prime')]/span/text()")
            #             if (fba):
            #                 if (fba[0].strip() == "Prime"):
            #                     item['is_fba'] = '1'
            #                     # print (item['fba'])
            #                 else:
            #                     item['is_fba'] = '0'
            #             else:
            #                 item['is_fba'] = '0'
            #         except:
            #             print ("fba error")
            #
            #         # 价格 price
            #         try:
            #             price = itemDom.xpath("div//*[contains(@class, 'a-color-base')]/@aria-label")
            #             if (price):
            #                 item['price'] = Model_Processor().formatNumber(price[0], "de")
            #                 # print (item['price'])
            #             # else:
            #             #     print ("no price")
            #         except:
            #             print ("price error")
            #
            #         # 原价 list_price
            #         try:
            #             list_price = itemDom.xpath("div//*[contains(@class, 'a-text-strike')]/text()")
            #             if (list_price):
            #                 item['list_price'] = Model_Processor().formatNumber(list_price[0], "de")
            #                 # print item['list_price']
            #             # else:
            #             #     print ("no list_price")
            #         except:
            #             print ("list_price error")
            #
            #         # 评级 rating a-icon-star
            #         try:
            #             rating = itemDom.xpath("div//*[contains(@class, 'a-icon-star')]/span/text()")
            #             if (rating):
            #                 item['rating'] = Model_Processor().formatRating(rating[0], "de")
            #                 # print item['rating']
            #             # else:
            #             #     print ("no rating")
            #         except:
            #             print ("rating error")
            #
            #         # 评论数 review_count //*[@id="result_0"]/div/div[3]/div[3]/a
            #         try:
            #             review_count = itemDom.xpath("div//*[contains(@class, 'a-span5')]/div/a/text()")
            #             if (review_count):
            #                 count = Model_Processor().formatNumber(review_count[0], "de")
            #                 if (count.isdigit()):
            #                     item['review_count'] = count
            #                 # print (item['review_count'])
            #             else:
            #                 review_count = itemDom.xpath("div//*[contains(@class, 'a-spacing-top-mini')]/a/text()")
            #                 if (review_count):
            #                     count = Model_Processor().formatNumber(review_count[0], "de")
            #                     if (count.isdigit()):
            #                         item['review_count'] = count
            #                 # else:
            #                 #     print ("no review_count")
            #         except:
            #             print ("review_count error")
            #
            #         # print (item)
            #         data.append(item)
            # # print (len(listDom2))
            # if (listDom2):
            #     page_position = page_position
            #     sponsor_position = 1
            #     previous_sponsor_position_type = 'top'
            #     for itemDom in listDom2:
            #         item = {'sponsor': 0, 'page_id': 1, 'page_position': page_position}
            #         # 标记为sponsor的产品
            #         sponsorDom = itemDom.xpath("div//*[contains(@class, 's-sponsored-list-header')]/text()")
            #         # sponsorDom = soup.select("h5.s-sponsored-list-header")
            #         # print (sponsorDom)
            #         if (sponsorDom):
            #             item['sponsor'] = 1
            #         else:
            #             # a-color-tertiary
            #             sponsorDom = itemDom.xpath("div//h5[contains(@class, 'a-color-tertiary')]/text()")
            #             if (sponsorDom):
            #                 item['sponsor'] = 1
            #         if (item['sponsor'] == 1):
            #             if (page_position <= 4):
            #                 sponsor_position_type = 'top'
            #             else:
            #                 sponsor_position_type = 'bottom'
            #                 if (sponsor_position_type != previous_sponsor_position_type):
            #                     sponsor_position = 1
            #                     previous_sponsor_position_type = 'bottom'
            #             item['sponsor_position_type'] = sponsor_position_type
            #             item['sponsor_position'] = sponsor_position
            #             sponsor_position += 1
            #         page_position += 1
            #
            #         # asin
            #         try:
            #             asin = itemDom.xpath("@data-asin")
            #             if (asin):
            #                 item['asin'] = asin[0]
            #                 # print (item['asin'])
            #             # else:
            #             #     print ("no asin2")
            #         except:
            #             print ("asin2 error")
            #
            #         # 标题 title
            #         try:
            #             title = itemDom.xpath("div//*[contains(@class, 's-access-detail-page')]/h2/text()")
            #             if (title):
            #                 item['title'] = title[0]
            #                 # print (title[0])
            #             # else:
            #             #     print ("no title2")
            #         except:
            #             print ("title2 error")
            #
            #         # 图片 images
            #         try:
            #             try:
            #                 image = itemDom.xpath("div//*[contains(@class, 's-access-image')]/@src")
            #                 if (image):
            #                     item['image'] = Model_Processor().formatImage(image[0])
            #                 # else:
            #                 #     print ("no image2")
            #                 # print (item['image'])
            #             except:
            #                 print ("image2 error")
            #             # 宽度
            #             try:
            #                 width = itemDom.xpath("div//*[contains(@class, 's-access-image')]/@width")
            #                 if (width):
            #                     item['width'] = width[0]
            #                 # else:
            #                 #     print ("no image2 width")
            #                 # print (item['width'])
            #             except:
            #                 print ("image2 width error")
            #             # 高度
            #             try:
            #                 height = itemDom.xpath("//*[contains(@class, 's-access-image')]/@height")
            #                 if (height):
            #                     item['height'] = height[0]
            #                 # else:
            #                 #     print ("no image2 height")
            #                 # print (item['height'])
            #             except:
            #                 print ("image2 height error")
            #         except:
            #             print ("images2 error")
            #
            #         # Bestseller
            #         try:
            #             bestsellerDom = itemDom.xpath("div//*[contains(@class, 'sx-badge-rectangle')]/span/text()")
            #             if (bestsellerDom):
            #                 if (bestsellerDom[0] == "Best Seller"):
            #                     bestseller_node_id = itemDom.xpath(
            #                         "div//*[contains(@class, 'sx-badge-region')]/div/a/@href")
            #                     if (bestseller_node_id):
            #                         # /gp/bestsellers/electronics/2407761011/ref=sr_bs_19_2407761011_1
            #                         # /gp/bestsellers/electronics/12557637011/ref=sr_bs_25_12557637011_1
            #                         # /gp/bestsellers/electronics/15124502011/ref=sr_bs_27_15124502011_1
            #                         bestseller_id = bestseller_node_id[0].strip().split("bestsellers/")[1].strip().split("/")[1].strip()
            #                         if (bestseller_id.isdigit()):
            #                             item['bestseller_node_id'] = bestseller_id
            #                         # print (item['bestseller_node_id'])
            #             # else:
            #             #     print ("no bestseller2")
            #         except:
            #             print ("bestseller2 error")
            #
            #         # fba
            #         try:
            #             fba = itemDom.xpath("div//*[contains(@class, 'a-icon-prime')]/span/text()")
            #             if (fba):
            #                 if (fba[0].strip() == "Prime"):
            #                     item['is_fba'] = '1'
            #                     # print (item['fba'])
            #                 else:
            #                     item['is_fba'] = '0'
            #             else:
            #                 item['is_fba'] = '0'
            #         except:
            #             print ("fba2 error")
            #
            #         # 价格 price
            #         try:
            #             price = itemDom.xpath("div//*[contains(@class, 'a-color-base')]/@aria-label")
            #             if (price):
            #                 item['price'] = Model_Processor().formatNumber(price[0], "de")
            #                 # print (item['price'])
            #             else:
            #                 price = itemDom.xpath("div//*[contains(@class, 'a-color-base')]/text()")
            #                 if (price):
            #                     item['price'] = Model_Processor().formatNumber(price[0], "de")
            #                 # else:
            #                 #     print ("no price2")
            #         except:
            #             print ("price2 error")
            #
            #         # 原价 list_price
            #         try:
            #             list_price = itemDom.xpath("div//*[contains(@class, 'a-text-strike')]/text()")
            #             if (list_price):
            #                 item['list_price'] = Model_Processor().formatNumber(list_price[0], "de")
            #                 # print item['list_price']
            #             # else:
            #             #     print ("no list_price2")
            #         except:
            #             print ("list_price2 error")
            #
            #         # 评级 rating
            #         try:
            #             rating = itemDom.xpath("div//*[contains(@class, 'a-icon-star')]/span/text()")
            #             if (rating):
            #                 item['rating'] = Model_Processor().formatRating(rating[0], "de")
            #                 # print item['rating']
            #             # else:
            #             #     print ("no rating2")
            #         except:
            #             print ("rating2 error")
            #
            #         # 评论数 review_count
            #         try:
            #             review_count = itemDom.xpath("div//*[contains(@class, 'a-span5')]/div/a/text()")
            #             try:
            #                 if (review_count):
            #                     count = Model_Processor().formatNumber(review_count[0], "de")
            #                     if (count.isdigit()):
            #                         item['review_count'] = count
            #                     # print (item['review_count'])
            #                     # else:
            #                     #     print ("no review_count2")
            #                 else:
            #                     review_count = itemDom.xpath("div//*[contains(@class, 'a-spacing-top-mini')]/a/text()")
            #                     if (review_count):
            #                         count = Model_Processor().formatNumber(review_count[0], "de")
            #                         if (count.isdigit()):
            #                             item['review_count'] = count
            #                     # else:
            #                     #     print ("no review_count2")
            #             except Exception as err:
            #                 print (err)
            #         except:
            #             print ("review_count2 error")
            #
            #         # print (item)
            #         data.append(item)

            # 处理右侧广告
            try:
                rightTitleDom = tree.xpath("//*[@id='paRightContent']//h1/text()")
                if (rightTitleDom):
                    if (rightTitleDom[0].strip() == "Sponsored"):
                        rightListDom = tree.xpath("//*[@id='paRightContent']//*[contains(@class, 'pa-ad-details')]")
                        if (rightListDom):
                            sponsor_position = 1
                            for itemDom in rightListDom:
                                item = {'sponsor': 1, 'sponsor_position_type': 'right', 'page_id': page_id, 'page_position': page_position, 'sponsor_position': sponsor_position}
                                page_position += 1
                                sponsor_position += 1
                                # 获取广告ASIN
                                linkDoms = itemDom.xpath("div/a/@href")
                                if (linkDoms):
                                    for linkDom in linkDoms:
                                        item['asin'] = linkDom.split("%2F")[4]
                                        # if not item['asin']:
                                        # //*[@id="desktop-rhs-carousels_click_within_right"]/div/div[3]/div[2]/div[3]/a[1]
                                    if not item['asin']:
                                        linkDoms = itemDom.xpath("div/div['a-section']/a")
                                        if (linkDoms):
                                            # for linkDom in linkDoms:
                                            if (linkDom.text):
                                                item['review_count'] = linkDom.text
                                                item['asin'] = linkDom.xpath("@href")[0].split("/")[4]
                                    else:
                                        # 获取评论数
                                        review_countDom = itemDom.xpath("div/div['a-section']/a[2]/text()")
                                        if (review_countDom):
                                            item['review_count'] = Model_Processor().formatNumber(review_countDom[0], "fr")
                                # 获取广告图片
                                try:
                                    imageDom = itemDom.xpath("div//*[@alt='Product Details']/@src")
                                    if (imageDom):
                                        item['image'] = Model_Processor().formatImage(imageDom[0])
                                except:
                                    print ("ad image error")
                                # 获取广告标题
                                try:
                                    titleDom = itemDom.xpath("div['a-section']/div/a/@title")
                                    if (titleDom):
                                        item['title'] = titleDom[0]
                                except:
                                    print ("ad title error")
                                # 获取广告价格
                                try:
                                    priceDom = itemDom.xpath("div//*/@aria-label")
                                    if (priceDom):
                                        if (Model_Processor().formatNumber(priceDom[0], "fr").isdigit()):
                                            item['price'] = Model_Processor().formatNumber(priceDom[0], "fr")
                                except:
                                    print ("ad price error")
                                # 获取广告原价
                                try:
                                    list_priceDom = itemDom.xpath("div//*[contains(@class, 'a-text-strike')]/text()")
                                    if (list_priceDom):
                                        if (Model_Processor().formatNumber(list_priceDom[0], "fr").isdigit()):
                                            item['list_price'] = Model_Processor().formatNumber(list_priceDom[0], "fr")
                                except:
                                    print ("ad list_price error")
                                # 获取rating a-icon-star
                                try:
                                    ratingDom = itemDom.xpath("div//*[contains(@class, 'a-icon-star')]/span/text()")
                                    if (ratingDom):
                                        item['rating'] = Model_Processor().formatRating(ratingDom[0], "fr")
                                except:
                                    print ("ad rating error")
                                # print (item)
                                data.append(item)
            except:
                print ("right ad error")
            #计算总数
            totalDom = tree.xpath("//*[@id='s-result-count']/text()")
            if (totalDom):
                # 总数 >1
                # print (totalDom[0].split(" "))
                try:
                    total = Model_Processor().formatNumber(totalDom[0].split("sur")[1].strip().split("résultats")[0].replace(" ", "").strip(), "fr")
                except:
                    total = Model_Processor().formatNumber(totalDom[0].split(" ")[0], "fr")
                # print (str(total).replace(" ", ""))
                total = str(total).replace("résultat", "").replace(" ", "")
                if (total.isdigit()):
                    total = {"total": total}
                    data.append(total)
                else:
                    # 总数 ==1
                    total = Model_Processor().formatNumber(totalDom[0].split(" ")[0], "fr")
                    if (total.isdigit()):
                        total = {"total": total}
                        data.append(total)
                # print (Model_Processor().formatNumber(totalDom[0].split(" ")[2]))
            else:
                # 总数为0 //*[@id="noResultsTitle"]/span[1]
                totalDom = tree.xpath("//*[@id='noResultsTitle']//span/text()")
                if (totalDom):
                    if (totalDom[0] == "0" and totalDom[1] == "résultats"):
                        print totalDom[0]
                        print totalDom[1]
                        total = {"total": "0"}
                        data.append(total)
        except Exception as err:
            print (err)
        # print (len(data)-1)
        # print (data[-1])
        if (len(data)>0):
            return data