示例#1
0
 def _populate_buyer_reviews(self, response, product):
     if "buyer_reviews" in product:
         del product['buyer_reviews']
     revs = response.xpath('//div[@id="reviews"]/div[@id="reviews"]')
     if not revs:
         product['buyer_reviews'] = ZERO_REVIEWS_VALUE
         return
     total = response.xpath(
         '//div[@class="_Ape"]/div/div/div[@class="_wpe"]/text()').extract(
         )
     if not total:
         cond_set_value(product, 'buyer_reviews', ZERO_REVIEWS_VALUE)
         return
     total = re.findall("\d*,?\d+", total[0])
     total = int(total[0].replace(',', ''))
     reviews = response.xpath(
         '//div[@id="reviews"]/div[@id="reviews"]//div[@class="_Joe"]'
         '/div/a/div[@class="_Roe"]/@style'
         '/div/a/div[@class="_Roe"]/@style |'
         '//div[@id="reviews"]//a/div[@class="_Roe"]/@style').extract()
     star = 5
     by_star = {}
     for rev in reviews:
         percents = re.findall("width:(\d+\.?\d*)\%", rev)[0]
         rev_number = total * float(percents) / 100
         rev_number = int(round(rev_number))
         by_star[star] = rev_number
         star -= 1
     avg = float(
         sum([star * rating for star, rating in by_star.iteritems()]))
     avg /= total
     reviews = BuyerReviews(num_of_reviews=total,
                            average_rating=round(avg, 1),
                            rating_by_star=by_star)
     cond_set_value(product, 'buyer_reviews', reviews)
示例#2
0
    def _parse_buyer_review(response, product_response):
        num_reviews = product_response.xpath(
            '//*[@itemprop="reviewCount"]/@content').extract()[0]
        average_rating = product_response.xpath(
            '//*[@itemprop="ratingValue"]/@content').extract()[0]

        rating_by_star = {'1': 0, '2': 0, '3': 0, '4': 0, '5': 0}

        stars = product_response.xpath(
            '//*[@class="pr-ratings-histogram-content"]'
            '//p[@class="pr-histogram-label"]//span/text()').re('\d+')
        values = product_response.xpath(
            '//*[@class="pr-ratings-histogram-content"]'
            '//p[@class="pr-histogram-count"]//span/text()').re('\d+')

        for (star, value) in zip(stars, map(int, values)):
            rating_by_star[star] += value

        stars = response.xpath('//*[@class="pr-info-graphic-amazon"]'
                               '//dd/text()').re('(\d+) star')

        values = response.xpath('//*[@class="pr-info-graphic-amazon"]'
                                '//dd/text()').re('\((\d+)\)')

        for (star, value) in zip(stars, map(int, values)):
            rating_by_star[star] += value

        buyer_reviews = BuyerReviews(num_of_reviews=num_reviews,
                                     average_rating=average_rating,
                                     rating_by_star=rating_by_star)

        return buyer_reviews or None
示例#3
0
 def _request_buyer_reviews(self, response):
     anonim_reviews = response.xpath('//div[@class="reevooReview"]')
     if anonim_reviews:
         total = len(anonim_reviews)
         stars = {}
         for review in anonim_reviews:
             regex = 'Score is (\d+)'
             count = review.xpath(
                 '//div[@class="unverified_stars"]/@title').re(regex)[0]
             if count in stars.keys():
                 stars[count] += 1
             else:
                 stars[count] = 1
         sum = 0
         for k, v in stars.iteritems():
             sum += int(k) * v
         avg = float(sum) / float(total)
         res = BuyerReviews(num_of_reviews=total,
                            average_rating=avg,
                            rating_by_star=stars)
         if total:
             response.meta['product']['buyer_reviews'] = res
         else:
             response.meta['product']['buyer_reviews'] = ZERO_REVIEWS_VALUE
     else:
         sku = response.css('p.productid::attr(class)').re('p_(\d+)')
         sku = sku[0] if sku else re.search('.+/([^,]+)',
                                            response.url).group(1)
         url = self.REVOO_URL.format(sku=sku)
         return url
示例#4
0
    def _parse_buyer_reviews(self, response):
        num_of_reviews = is_empty(
            response.xpath(
                '//meta[@itemprop="reviewCount"]/@content').extract())

        if num_of_reviews:
            # Get average rating
            average_rating = is_empty(
                response.xpath(
                    '//meta[@itemprop="ratingValue"]/@content').extract(), 0.0)

            # Count rating by star
            rating_by_star = {'1': 0, '2': 0, '3': 0, '4': 0, '5': 0}
            stars = response.xpath('//*[@id="reviews"]/./'
                                   '/li/.//meta[@itemprop="ratingValue"]'
                                   '/@content').extract()

            for star in stars:
                rating_by_star[star] += 1

            buyer_reviews = {
                'num_of_reviews': int(num_of_reviews),
                'average_rating': float(average_rating),
                'rating_by_star': rating_by_star
            }
        else:
            buyer_reviews = self.ZERO_REVIEWS_VALUE

        return BuyerReviews(**buyer_reviews)
示例#5
0
    def _parse_prod_info_js(self, response):
        meta = response.meta.copy()
        reqs = meta.get("reqs")
        product = meta['product']
        data = response.body_as_unicode()
        data = is_empty(re.findall(r'bvGetReviewSummaries\((.+)\)', data))

        if data:
            data = json.loads(data)
            results = is_empty(data.get('Results', []))

            if results:
                # Buyer reviews
                buyer_reviews = self._parse_buyer_reviews(results, response)
                product['buyer_reviews'] = BuyerReviews(**buyer_reviews)

                # Get brand
                self._parse_brand(response, results)

                # Get department
                self._parse_department(response, results)

        if reqs:
            return self.send_next_request(reqs, response)

        return product
示例#6
0
 def _parse_buyer_reviews(self, response):
     scores = response.meta.get('scores', [])
     css = '.overall_score_stars::attr(title)'
     scores.extend(map(int, response.css(css).extract()))
     response.meta['scores'] = scores
     next_url = response.css('.next_page::attr(href)')
     if next_url:
         next_url = urljoin(response.url, next_url[0].extract())
         return Request(next_url,
                        self._parse_buyer_reviews,
                        meta=response.meta)
     try:
         avg, total = self._scrape_review_summary(response)
     except ValueError:
         response.meta['product']['buyer_reviews'] = ZERO_REVIEWS_VALUE
         return
     if not total:
         response.meta['product']['buyer_reviews'] = ZERO_REVIEWS_VALUE
         return
     avg = float(avg)
     total = int(total)
     by_star = {score: scores.count(score) for score in scores}
     res = BuyerReviews(num_of_reviews=total,
                        average_rating=avg,
                        rating_by_star=by_star)
     response.meta['product']['buyer_reviews'] = res
    def parse_buyer_reviews(self, response):
        product = response.meta.get("product")
        reqs = response.meta.get("reqs")

        total = int(
            is_empty(
                response.xpath(
                    "//span[contains(@class, 'BVRRRatingSummaryHeaderCounterValue')]"
                    "/text()").re(FLOATING_POINT_RGEX), 0))

        average = float(
            is_empty(re.findall("avgRating\"\:(\d+\.\d+)", response.body), 0))

        rbs = response.xpath(
            "//span[contains(@class, 'BVRRHistAbsLabel')]/text()").extract(
            )[:5]
        rbs.reverse()
        rating_by_star = {}
        if rbs:
            for i in range(5, 0, -1):
                rating_by_star[i] = int(rbs[i - 1].replace("\n", "").replace(
                    "\t", "").replace("\\n", ""))
        if total and average:
            product["buyer_reviews"] = BuyerReviews(
                num_of_reviews=total,
                average_rating=average,
                rating_by_star=rating_by_star)
        else:
            product["buyer_reviews"] = ZERO_REVIEWS_VALUE

        if reqs:
            return self.send_next_request(reqs, response)

        return product
示例#8
0
    def _parse_buyer_reviews(self, response):
        avg = response.xpath(
            '//*[@class="pr-rating pr-rounded average"]/text()').extract()

        avg = (float(avg[0]) if avg else 0.0)

        num_reviews = response.xpath(
            '//*[@class="pr-snapshot-average-based-on-text"]'
            '/span/text()').extract()

        num_reviews = (int(num_reviews[0]) if num_reviews else 0)

        ratings_by_star = {'1': 0, '2': 0, '3': 0, '4': 0, '5': 0}

        keys = response.xpath(
            '(//ul[@class="pr-ratings-histogram-content"])[1]'
            '//*[@class="pr-histogram-label"]//span/text()').re('(\d+) Stars')
        values = response.xpath(
            '(//ul[@class="pr-ratings-histogram-content"])[1]'
            '//*[@class="pr-histogram-count"]/span').re('(\d+)')

        for (key, value) in zip(keys, values):
            ratings_by_star[key] = int(value)

        return BuyerReviews(num_of_reviews=num_reviews,
                            average_rating=avg,
                            rating_by_star=ratings_by_star)
示例#9
0
    def parse_buyer_reviews(self, response):
        meta = response.meta.copy()
        buyer_reviews_per_page = self.br.parse_buyer_reviews_per_page(response)

        for k, v in buyer_reviews_per_page['rating_by_star'].iteritems():
            response.meta['marks'][k] += v

        product = response.meta['product']
        reqs = meta.get('reqs')

        product['buyer_reviews'] = BuyerReviews(
            num_of_reviews=buyer_reviews_per_page['num_of_reviews'],
            average_rating=buyer_reviews_per_page['average_rating'],
            rating_by_star=response.meta['marks'])
        if reqs:
            reqs.append(
                Request(url=self.RELATED_PRODUCT.format(
                    product_id=self.product_id, index=0),
                        dont_filter=True,
                        callback=self.parse_related_product))

        if reqs:
            return self.send_next_request(reqs, response)

        return product
示例#10
0
 def _get_buyer_reviews(self, response):
     average = response.xpath(
         '//*[contains(@class, "average-score")]'
         '[contains(@itemprop, "ratingValue")]//text()').extract()
     if not average:
         return
     try:
         average = float(average[0])
     except:
         self.log('Invalid buyer reviews at %s' % response.url)
         return
     num = response.xpath(
         '//meta[contains(@itemprop, "reviewCount")]/@content').extract()
     num = int(num[0].replace(',', ''))
     # scrape rating by star
     rating_by_star = {}
     for star_num, star_breakdown in enumerate(
             response.xpath('//*[contains(@id, "ratings-tooltip")]'
                            '//*[contains(@class, "star-breakdowns")]'
                            '//*[contains(@class, "star-breakdown")]')):
         current_mark = 5 - star_num
         if star_num >= 5:
             break
         star_count = star_breakdown.css(
             '.star-count ::text').extract()[0].replace(',', '')
         rating_by_star[str(current_mark)] = int(
             re.search('(\d+)', star_count).group(1))
     return BuyerReviews(num_of_reviews=num,
                         average_rating=average,
                         rating_by_star=rating_by_star)
示例#11
0
    def _parse_review_api(self, response):
        product = response.meta['product']
        reqs = response.meta.get('reqs', [])
        res = re.findall('\{.*\}', response.body)[0]
        data = json.loads(res)

        product['brand'] = data['BatchedResults']['q0']['Results'][0]['Brand']['Name']

        by_star = {}
        stars = data['BatchedResults']['q0']['Results'][0][
            'ReviewStatistics']['RatingDistribution']
        for star in stars:
            by_star[star['RatingValue']] = star['Count']

        total = data['BatchedResults']['q0']['Results'][0][
            'ReviewStatistics']['TotalReviewCount']
        if total == 0:
            product['buyer_reviews'] = ZERO_REVIEWS_VALUE

        else:
            avg = round(data['BatchedResults']['q0']['Results'][0][
                        'ReviewStatistics']['AverageOverallRating'], 1)

            product['buyer_reviews'] = BuyerReviews(num_of_reviews=total,
                                                    average_rating=avg,
                                                    rating_by_star=by_star)
        if reqs:
            return self.send_next_request(reqs, response)

        return product
示例#12
0
    def populate_by_star(self, response):
        # maybe some optimisation will required for this method
        total_scores = response.meta.get('total_scores', [])
        scores = response.xpath(
            '//article[contains(@id, "review")]'
            '//span[contains(@class, "overall_score")]/@title').extract()
        total_scores.extend(scores)
        next_url = response.xpath('//a[@class="next_page"]/@href').extract()
        if next_url:
            url = 'http://mark.reevoo.com' + next_url[0]
            meta = response.meta.copy()
            meta['total_scores'] = total_scores
            return Request(url, callback=self.populate_by_star, meta=meta)
        stars = {}
        for number in range(1, 11):
            pattern = '%s out of 10' % number
            counted = total_scores.count(pattern)
            stars[number] = counted

        avg_total = response.xpath(
            '//div[@class="average_score"]/@title').extract()
        avg = re.findall(r'is\s(.*)\sout', avg_total[0])
        avg = float(avg[0])
        total = re.findall(r'from\s(\d+)\sreview', avg_total[0])
        total = int(total[0])

        product = response.meta['product']
        if total:
            product['buyer_reviews'] = BuyerReviews(total, avg, stars)
        else:
            product['buyer_reviews'] = ZERO_REVIEWS_VALUE
        return product
示例#13
0
    def parse_buyer_reviews(self, response):
        meta = response.meta.copy()
        buyer_reviews_per_page = self.br.parse_buyer_reviews_per_page(response)

        for k, v in buyer_reviews_per_page['rating_by_star'].iteritems():
            response.meta['marks'][k] += v

        product = response.meta['product']
        reqs = meta.get('reqs', [])

        product['buyer_reviews'] = BuyerReviews(
            num_of_reviews=buyer_reviews_per_page['num_of_reviews'],
            average_rating=buyer_reviews_per_page['average_rating'],
            rating_by_star=response.meta['marks']
            )

        # Updated related product url, previous res-x doesn't work
        product_id = self.product_id + 'US'
        url = self.RELATED_PRODUCT.format(product_id=product_id,
                                          product_categories=self.product_categories,
                                          product_url=product.get('url'))
        reqs.append(
            Request(
                url=url,
                dont_filter=True,
                callback=self.parse_related_product,
                meta=meta
            ))

        return self.send_next_request(reqs, response)
示例#14
0
    def _parse_buyer_reviews(self, response):
        average_rating = is_empty(
            response.xpath('//div[@id="prod_ratings"]//span[@class="pr-rating '
                           'pr-rounded average"]/text()').extract(), 0.0)

        num_of_reviews = is_empty(
            response.xpath('//div[@id="prod_ratings"]//span[@class="count"]'
                           '/text()').extract(), 0)

        evaluetion = response.xpath(
            '//p[@class="pr-histogram-count"]/span/text()').re(r'\d+')[:5]

        if average_rating:
            average_rating = float(average_rating)

        if num_of_reviews:
            num_of_reviews = int(num_of_reviews)

        if evaluetion:
            evaluetion.reverse()

        rating_by_star = {1: 0, 2: 0, 3: 0, 4: 0, 5: 0}

        if num_of_reviews > 0:
            for index, i in enumerate(evaluetion):
                rating_by_star[index + 1] = int(i)

        return BuyerReviews(num_of_reviews, average_rating, rating_by_star)
示例#15
0
    def _parse_buyer_review(self, response):
        rating_by_star = {'1': 0, '2': 0, '3': 0, '4': 0, '5': 0}
        stars = response.xpath('//div[@class="box-collateral box-reviews"]'
                               '/dl/dt/div/div/@style').extract()
        points = []
        for star in stars:
            point = re.findall(r'(\d+)', star)

            if point[0] == '100':
                points.append(5)
            elif point[0] == '80':
                points.append(4)
            elif point[0] == '60':
                points.append(3)
            elif point[0] == '40':
                points.append(2)
            elif point[0] == '20':
                points.append(1)
        for point in points:
            rating_by_star[str(point)] += 1
        average_rating = response.xpath('//meta[@itemprop="ratingValue"]'
                                        '/@content').extract()
        num_of_reviews = len(points)
        if stars:
            buyer_reviews = {
                'num_of_reviews': int(num_of_reviews),
                'average_rating': float(average_rating[0]),
                'rating_by_star': rating_by_star
            }
        else:
            return ZERO_REVIEWS_VALUE

        return BuyerReviews(**buyer_reviews)
示例#16
0
def populate_reviews(response, reviews):
    """ Populate `buyer_reviews` from list of user ratings as floats """
    if reviews:
        by_star = {rating: reviews.count(rating) for rating in reviews}
        reviews = BuyerReviews(num_of_reviews=len(reviews),
                               average_rating=sum(reviews) / len(reviews),
                               rating_by_star=by_star)
        cond_set_value(response.meta['product'], 'buyer_reviews', reviews)
示例#17
0
    def _parse_buyer_reviews(self, response):
        product = response.meta['product']
        reqs = response.meta.get('reqs', [])

        content = re.search('BVRRRatingSummarySourceID":"(.+?)\},',
                            response._body).group(1).replace('\\"', '"')
        content = content.replace("\\/", "/")
        review_html = html.fromstring(content)

        arr = review_html.xpath(
            '//div[contains(@class,"BVRRQuickTakeSection")]'
            '//div[contains(@class,"BVRRRatingOverall")]'
            '//img[contains(@class,"BVImgOrSprite")]/@title')

        if len(arr) > 0:
            average_rating = float(arr[0].strip().split(" ")[0])
        else:
            average_rating = 0.0

        arr = review_html.xpath(
            '//div[contains(@class,"BVRRReviewDisplayStyle5")]'
            '//div[contains(@class,"BVRRReviewDisplayStyle5Header")]'
            '//span[@itemprop="ratingValue"]//text()')
        num_of_reviews = len(arr)

        review_list = [[5 - i, arr.count(str(5 - i))] for i in range(5)]

        if review_list:
            # average score
            sum = 0
            cnt = 0
            for i, review in review_list:
                sum += review * i
                cnt += review
            # average_rating = float(sum)/cnt
            # number of reviews
            num_of_reviews = 0
            for i, review in review_list:
                num_of_reviews += review
        else:
            pass

        rating_by_star = {1: 0, 2: 0, 3: 0, 4: 0, 5: 0}
        for i, review in review_list:
            rating_by_star[i] = review
        if average_rating and num_of_reviews:
            product["buyer_reviews"] = BuyerReviews(
                num_of_reviews=int(num_of_reviews),
                average_rating=float(average_rating),
                rating_by_star=rating_by_star,
            )
        else:
            product["buyer_reviews"] = ZERO_REVIEWS_VALUE

        if reqs:
            return self.send_next_request(reqs, response)

        return product
 def _parse_bv(self, response):
     product = response.meta['product']
     text = response.body_as_unicode().encode('utf-8')
     if response.status == 200:
         x = re.search(r"var materials=(.*),\sinitializers=", text,
                       re.M + re.S)
         if x:
             jtext = x.group(1)
             jdata = json.loads(jtext)
             html = jdata['BVRRRatingSummarySourceID']
             sel = Selector(text=html.encode('utf-8'))
             m = re.search(r'"avgRating":(.*?),', text, re.M)
             if m:
                 avrg = m.group(1)
                 try:
                     avrg = float(avrg)
                 except ValueError:
                     avrg = 0.0
             total = sel.xpath(
                 "//div[@class='BVRRHistogram']"
                 "/div[@class='BVRRHistogramTitle']"
                 "/span[contains(@class,'BVRRNonZeroCount')]"
                 "/span[@class='BVRRNumber']/text()").extract()
             if total:
                 try:
                     total = int(total[0])
                 except ValueError:
                     total = 0
             else:
                 total = 0
             hist = sel.xpath(
                 "//div[@class='BVRRHistogram']"
                 "/div[@class='BVRRHistogramContent']"
                 "/div[contains(@class,'BVRRHistogramBarRow')]")
             distribution = {}
             for ih in hist:
                 name = ih.xpath("span/span[@class='BVRRHistStarLabelText']"
                                 "/text()").re("(\d) star")
                 try:
                     if name:
                         name = int(name[0])
                     value = ih.xpath(
                         "span[@class='BVRRHistAbsLabel']/text()").extract(
                         )
                     if value:
                         value = int(value[0])
                     distribution[name] = value
                 except ValueError:
                     pass
             if distribution:
                 reviews = BuyerReviews(total, avrg, distribution)
                 cond_set_value(product, 'buyer_reviews', reviews)
             elif not total:
                 cond_set_value(product, 'buyer_reviews',
                                ZERO_REVIEWS_VALUE)
     return product
示例#19
0
    def parse_buyer_reviews(self, response):
        product = response.meta.get("product")
        reqs = response.meta.get("reqs")
        total = 0

        rev = is_empty(re.findall("temp\s+=\s+\(([^\)]*)", response.body), "")
        try:
            rev = json.loads(rev)
        except ValueError:
            rev = {}
        if rev:
            for v in rev.values():
                total += int(v)

            avg = is_empty(
                response.xpath(
                    "//p[contains(@class, 'ig-heading')]/span/text()").extract(
                    ), 0)
            if avg:
                avg = float(is_empty(re.findall("([^\/]*)", str(avg)), 0))
        else:
            avg = float(
                is_empty(
                    response.xpath(
                        "//div[contains(@class, 'ratetxt')]/span[1]/text()").
                    re(FLOATING_POINT_RGEX), 0))
            for item in response.xpath("//div[contains(@class, 'row')]/span"):
                star = is_empty(
                    item.xpath("span[1]/text()").re(FLOATING_POINT_RGEX))
                if not star:
                    continue
                rev[star] = is_empty(
                    item.xpath("span[last()]/text()").re(FLOATING_POINT_RGEX))
            for item in response.xpath("//div[contains(@class, 'row')]"):
                star = is_empty(
                    item.xpath("span[1]/text()").re(FLOATING_POINT_RGEX))
                if not star:
                    continue
                rev[star] = is_empty(
                    item.xpath("span[last()]/text()").re(FLOATING_POINT_RGEX))

            for v in rev.values():
                total += int(v)

        if avg and total:
            product["buyer_reviews"] = BuyerReviews(num_of_reviews=total,
                                                    average_rating=avg,
                                                    rating_by_star=rev)
        else:
            product["buyer_reviews"] = 0

        if reqs:
            return self.send_next_request(reqs, response)

        return product
示例#20
0
    def _parse_review(self, response):
        prod = response.meta['product']
        num, avg, by_star = prod['buyer_reviews']
        data = json.loads(response.body_as_unicode())
        by_star = {1: 0, 2: 0, 3: 0, 4: 0, 5: 0}
        reviews = data['Results']
        for review in reviews:
            by_star[review['Rating']] += 1

        prod['buyer_reviews'] = BuyerReviews(num, avg, by_star)
        return prod
示例#21
0
    def parse_buyer_reviews(self, response):
        meta = response.meta.copy()
        product = response.meta['product']
        reqs = meta.get('reqs', [])
        try:
            jsonresponse = json.loads(response.body_as_unicode())
            response_selector = Selector(
                text=self._htmlspecialchars_decode(jsonresponse.get('result')))
            try:
                num_reviews = response_selector.xpath(
                    '//span[@class="font-color-gray based-on"]/text()').re(
                        '\d+')[0]
            except IndexError:
                num_reviews = 0
            try:
                avg_rating = response_selector.xpath(
                    '//span[@class="yotpo-star-digits"]/text()').extract(
                    )[0].strip()
            except IndexError:
                avg_rating = 0
            review_stars = response_selector.xpath(
                '//span[contains(@class, "yotpo-sum-reviews")]/text()').re(
                    '\((\d+)\)')[::-1]
            stars = product['buyer_reviews'].rating_by_star
            for star_index, star_value in enumerate(review_stars):
                star_index = str(star_index + 1)
                stars[star_index] = star_value
            last_date = response_selector.xpath(
                '//label[contains(@class, "yotpo-review-date")]/text()'
            ).extract()

            product['buyer_reviews'] = BuyerReviews(num_of_reviews=num_reviews,
                                                    average_rating=avg_rating,
                                                    rating_by_star=stars)
            if last_date:
                last_buyer_review_date = datetime.datetime.strptime(
                    last_date[0], '%m/%d/%y')
                product[
                    'last_buyer_review_date'] = last_buyer_review_date.strftime(
                        '%d-%m-%Y')
        except BaseException as e:
            self.log("Error extracting buyers reviews - {}".format(e), WARNING)
            if 'No JSON object could be decoded' in e:
                self.log("Repeating buyers reviews request", WARNING)
                reqs.append(
                    Request(response.url,
                            callback=self.get_price_and_stockstatus,
                            meta=meta,
                            dont_filter=True))

        if reqs:
            return self.send_next_request(reqs, response)
        else:
            return product
示例#22
0
    def _no_parse_reviews(self, response):
        product = response.request.meta['product']
        reqs = response.request.meta.get('reqs', [])
        rating_by_star = {'1': 0, '2': 0, '3': 0, '4': 0, '5': 0}
        product['buyer_reviews'] = BuyerReviews(num_of_reviews=0,
                                                average_rating=0,
                                                rating_by_star=rating_by_star)
        if reqs:
            return self.send_next_request(reqs, response)

        return product
示例#23
0
    def parse_buyer_reviews(self, response):
        total = response.xpath(
            '//div[contains(@class, "pr-snapshot-rating")]//span[contains(@class, "pr-rating")]/text()'
        ).extract()[0]
        avg = response.xpath(
            '//div[contains(@class, "pr-snapshot-rating")]//p[contains(@class, "pr-snapshot-average-based-on-text")]/span[@class="count"]/text()'
        ).extract()[0]
        ratings = []

        cond_set_value(response.meta['product'], 'buyer_reviews',
                       BuyerReviews(total, avg,
                                    ratings) if total else ZERO_REVIEWS_VALUE)
示例#24
0
    def _parse_reviews(self, response, product):
        product_id = response.css('#productId::attr(value)').extract()
        if not product_id:
            product_id = response.xpath('//*[contains(@class,"productID")]'
                                        '[contains(text(), "Web ID:")]/text()').extract()
            if product_id:
                product_id = [''.join([c for c in product_id[0] if c.isdigit()])]

        if product_id:  # Reviews
            url = "http://macys.ugc.bazaarvoice.com/7129aa/%s" \
                  "/reviews.djs?format=embeddedhtml" % (product_id[0],)
            r = requests.get(url)
            resp = r.text
            resp = re.findall("var materials=(.*)", resp)
            if resp:
                resp = resp[0]
                data = json.loads(resp[0:-1])
                hxs = HtmlXPathSelector(text=data["BVRRSourceID"])

                num_of_reviews = hxs.xpath(
                    '//div[@id="BVRRQuickTakeSummaryID"]'
                    '/div/div/div/div/div/div/div/div/span'
                    '/span[contains(@class, "BVRRNumber")]/text()'
                ).extract()
                if num_of_reviews:
                    num_of_reviews = int(num_of_reviews[0].replace(',', ''))
                    array = hxs.xpath(
                        '//div/span[@class="BVRRHistAbsLabel"]/text()'
                    ).extract()
                    if array:
                        rating_by_star = {}
                        array = list(array)
                        array.reverse()
                        count = 0
                        review_sum = 0
                        for i in range(0, 5):
                            rating_by_star[i + 1] = array[i].replace(',', '')
                            count += int(array[i].replace(',', ''))
                            review_sum += (i + 1) * int(array[i].replace(',',
                                                                         ''))
                        average_rating = round(
                            float(review_sum) / float(count), 2)

                        br = BuyerReviews(
                            num_of_reviews,
                            average_rating,
                            rating_by_star
                        )

                        cond_set_value(product, 'buyer_reviews', br)
        cond_set_value(product, 'buyer_reviews', ZERO_REVIEWS_VALUE)
示例#25
0
    def parse_buyer_reviews(self, response):
        meta = response.meta.copy()
        reqs = meta['reqs']

        self.br.br_count = meta['_br_count']
        buyer_reviews_per_page = self.br.parse_buyer_reviews_per_page(response)

        product = response.meta['product']
        product['buyer_reviews'] = BuyerReviews(**buyer_reviews_per_page)

        if reqs:
            return self.send_next_request(reqs, response)

        return product
示例#26
0
    def _parse_review(self, response):
        product = response.meta['product']
        reqs = response.meta.get('reqs', [])

        # review_html = html.fromstring(
        #      re.search('(<div id="pluck_reviews_rollup.+?\'\))', contents).group(1)
        #  )

        arr = response.xpath(
            "//div[contains(@class,'pluck-dialog-middle')]"
            "//span[contains(@class,'pluck-review-full-attributes-name-post')]/text()"
        ).extract()
        review_list = []
        if len(arr) >= 5:
            review_list = [[5 - i, int(re.findall('\d+', mark)[0])]
                           for i, mark in enumerate(arr)]
        if review_list:
            # average score
            sum = 0
            cnt = 0
            for i, review in review_list:
                sum += review * i
                cnt += review
            if cnt > 0:
                average_rating = float(sum) / cnt
            else:
                average_rating = 0.0
            # number of reviews
            num_of_reviews = 0
            for i, review in review_list:
                num_of_reviews += review
        else:
            pass

        rating_by_star = {1: 0, 2: 0, 3: 0, 4: 0, 5: 0}
        for i, review in review_list:
            rating_by_star[i] = review
        if average_rating and num_of_reviews:
            product["buyer_reviews"] = BuyerReviews(
                num_of_reviews=int(num_of_reviews),
                average_rating=float(average_rating),
                rating_by_star=rating_by_star,
            )
        else:
            product["buyer_reviews"] = ZERO_REVIEWS_VALUE

        if reqs:
            return self.send_next_request(reqs, response)

        return product
 def _populate_buyer_reviews(self, response, product):
     css = '#customer-reviews .rating::attr(style)'
     values = response.css(css).re('width:(\d+)')
     if not values:
         return
     values = [int(value) / 20 for value in values]
     total = len(values)
     avg = sum(values) / total
     by_star = {int(value): int(values.count(value)) for value in values}
     cond_set_value(
         product, 'buyer_reviews',
         BuyerReviews(num_of_reviews=total,
                      average_rating=avg,
                      rating_by_star=by_star))
示例#28
0
    def _parse_reviews(self, response):
        res = re.findall(r'"attributes":(.*),"ciTrackingEnabled"',
                         response.body)
        if res:
            data = json.loads(res[0])
            avg = data['avgRating']
            avg = float(avg)
            total = data['numReviews']
            total = int(total)
        stars = {}
        materials = re.findall(r'materials=(.*),', response.body)
        if materials:
            data = json.loads(materials[0])
            all_revs = response.meta.get('all_revs', [])
            pattern = r'itemprop="ratingValue" class="BVRRNumber'\
                ' BVRRRatingNumber">(\d+)<'
            results = re.findall(pattern, data[data.keys()[0]])
            all_revs.extend(results)
            for number in range(1, 6):
                pattern = str(number)
                quantity = all_revs.count(pattern)
                stars[number] = quantity
        # Buyer reviews populated on page by 8, 9-38, 39-68..
        if total > 8:
            counter = (total - 9) / 30
            page_counter = counter + 2
            meta = response.meta.copy()
            page_populated = 2
            if not response.meta.get('page_populated'):
                meta['page_populated'] = page_populated
            else:
                page_populated = int(response.meta['page_populated']) + 1
                meta['page_populated'] = page_populated

            initial_url = response.meta.get('initial_url')
            if not initial_url:
                initial_url = response.url
                meta['initial_url'] = initial_url
            if page_populated <= page_counter:
                meta['all_revs'] = all_revs
                next_page_url_part = "&page=%s" % page_populated
                next_page = initial_url + next_page_url_part
                return Request(next_page,
                               callback=self._parse_reviews,
                               meta=meta)

        product = response.meta['product']
        cond_set_value(
            product, 'buyer_reviews',
            BuyerReviews(total, avg, stars) if total else ZERO_REVIEWS_VALUE)
示例#29
0
    def _parse_buyer_reviews(self, response):
        product = response.meta['product']
        data = json.loads(response.body)

        try:
            data = data["response"]["bottomline"]
        except KeyError:
            cond_set_value(product, 'buyer_reviews', ZERO_REVIEWS_VALUE)
            return
        ratings = data['star_distribution']
        avg = float(data['average_score'])
        total = data['total_review']
        cond_set_value(
            response.meta['product'], 'buyer_reviews',
            BuyerReviews(total, avg, ratings) if total else ZERO_REVIEWS_VALUE)
示例#30
0
    def _get_stars_by_request(self, response):
        """
        Callback for Requast on buyer reviews.
        In response body we get json with html code.
        """
        meta = response.meta.copy()
        reqs = meta.get('reqs')
        product = meta['product']

        data = response.body_as_unicode()

        try:
            data = json.loads(data)
            html = data['html']
            num_of_reviews = is_empty(
                re.findall(
                    r'<span id="review_count">\s+(\d+)\s+</span>',
                    html
                )
            )
            average_rating = is_empty(
                re.findall(
                    r'itemprop="ratingValue">\s+(\d.\d)\s+</span>',
                    html
                )
            )
            star_rating = re.findall(
                r'<tr class="histogramrating" data-rating="(\d+)" data-reviewcount="(\d+)">',
                html
            )
            rating_by_star = {k: int(v) for (k, v) in star_rating}
            buyer_reviews = {
                'num_of_reviews': int(num_of_reviews),
                'average_rating': float(average_rating),
                'rating_by_star': rating_by_star
            }
            product['buyer_reviews'] = BuyerReviews(**buyer_reviews)
        except Exception as exc:
            self.log('Unable to parse buyer reviews from {url}: {exc}'.format(
                url=product['url'],
                exc=exc
            ), ERROR)
            product['buyer_reviews'] = ZERO_REVIEWS_VALUE

        if reqs:
            return self.send_next_request(reqs, response)

        return product