示例#1
0
def mainFunction(product):

    sites = xmlToDict.readXML('amazon')
    for site in sites:
        #product = 'Dell Inspiron 15 3521 Laptop (3rd Gen Ci3/ 4GB/ 500GB/ Win8)'

        searchPage = readHTML(site['searchURL'] + (product))

        productLinks = getLinkFromHTML(site['searchTags'], searchPage)
        productPage = readHTML(site['prefix'] + productLinks[0])
        if site['allReviewTags']:
            reviewsLink = getLinkFromHTML(site['allReviewTags'], productPage)
        else:
            reviewsLink = productLinks

        reviewPage = readHTML(site['prefix'] + reviewsLink[0])
        reviewsHeading = getTagFromHTML(site['reviewHeading'], reviewPage)
        reviewsText = getTagFromHTML(site['reviewText'], reviewPage)
        reviewsUpvotes = getTagFromHTML(site['reviewUpvote'], reviewPage)
        reviewStars = getTagFromHTML(site['reviewStarRating'], reviewPage)

        nextPage = getLinkFromHTML(site['reviewNextPage'], reviewPage)
        nextPage = [nextPage.pop()]

        i = 0
        while len(nextPage) > 0 and i < 2:
            reviewPage = readHTML(site['prefix'] + nextPage[0])
            reviewsHeading += getTagFromHTML(site['reviewHeading'], reviewPage)
            reviewsText += getTagFromHTML(site['reviewText'], reviewPage)
            reviewsUpvotes += getTagFromHTML(site['reviewUpvote'], reviewPage)
            reviewStars += getTagFromHTML(site['reviewStarRating'], reviewPage)
            nextPage = getLinkFromHTML(site['reviewNextPage'], reviewPage)
            nextPage = [nextPage.pop()]
            i += 1

        reviews = []
        for i in range(len(reviewsHeading)):
            review = {}
            review['heading'] = reviewsHeading[i]
            review['upvotes'] = [
                reviewsUpvotes[i].split(' ')[0],
                reviewsUpvotes[i].split(' ')[2]
            ]
            review['stars'] = reviewStars[i].split(' ')[0]
            reviewTextSplit = reviewsText[i].split('.')
            finalText = ''
            for jj in reviewTextSplit:
                if (jj.find('Amazon') == -1 and jj.find('amazon') == -1
                        and jj.find('delivered') == -1
                        and jj.find('delivery') == -1):
                    finalText += jj
                    finalText += '.'
            review['text'] = finalText
            reviews.append(review)
    return reviews
示例#2
0
def mainFunction(product):

    sites = xmlToDict.readXML('flipkart')
    for site in sites:
        #product = 'Dell Inspiron 15 3521 Laptop (3rd Gen Ci3/ 4GB/ 500GB/ Win8)'

        searchPage = readHTML(site['searchURL'] + (product))
        productLinks = getLinkFromHTML(site['searchTags'], searchPage)
        productPage = readHTML(site['prefix'] + productLinks[0])
        if site['allReviewTags']:
            reviewsLink = getLinkFromHTML(site['allReviewTags'], productPage)
        else:
            reviewsLink = productLinks
        
        reviewPage = readHTML(site['prefix'] + reviewsLink[0])
        reviewsHeading = getTagFromHTML(site['reviewHeading'], reviewPage)
        reviewsText = getTagFromHTML(site['reviewText'], reviewPage)
        reviewsUpvotes = getTagFromHTML(site['reviewUpvote'], reviewPage)
        reviewStars = getTitleFromHTML(site['reviewStarRating'], reviewPage)

        nextPage = getLinkFromHTML(site['reviewNextPage'], reviewPage)
        if len(nextPage) > 0:
            reviewPage = readHTML(site['prefix'] + nextPage[0])
            reviewsHeading += getTagFromHTML(site['reviewHeading'], reviewPage)
            reviewsText += getTagFromHTML(site['reviewText'], reviewPage)
            reviewsUpvotes += getTagFromHTML(site['reviewUpvote'], reviewPage)
            reviewStars += getTitleFromHTML(site['reviewStarRating'], reviewPage)
        
        reviews = []
        for i in range(len(reviewsHeading)):
            review = {}

            review['heading'] = reviewsHeading[i]
            a = reviewsUpvotes[ 2 * i]
            if reviewsUpvotes[ 2 * i].find('%') >= 0:
                a = reviewsUpvotes[2 * i][0:reviewsUpvotes[ 2 * i].find('%')]
                a = int(a)
                b = int(reviewsUpvotes[2 * i + 1])
                a = a * b / 100
            review['upvotes'] = [ str(a), reviewsUpvotes[2 * i + 1]]
            review['stars'] = reviewStars[i].split(' ')[0]

            reviewTextSplit = reviewsText[i].split('.')
            finalText = ''
            for jj in reviewTextSplit:
                if(jj.find('Flipkart') == -1 and jj.find('flipkart') == -1 and jj.find('delivered') == -1  and jj.find('delivery') == -1):
                    finalText += jj
                    finalText += '. '
            review['text'] = finalText
            
            reviews.append(review)
    return reviews