def mainFunction(product): sites = xmlToDict.readXML('amazon') for site in sites: #product = 'Dell Inspiron 15 3521 Laptop (3rd Gen Ci3/ 4GB/ 500GB/ Win8)' searchPage = readHTML(site['searchURL'] + (product)) productLinks = getLinkFromHTML(site['searchTags'], searchPage) productPage = readHTML(site['prefix'] + productLinks[0]) if site['allReviewTags']: reviewsLink = getLinkFromHTML(site['allReviewTags'], productPage) else: reviewsLink = productLinks reviewPage = readHTML(site['prefix'] + reviewsLink[0]) reviewsHeading = getTagFromHTML(site['reviewHeading'], reviewPage) reviewsText = getTagFromHTML(site['reviewText'], reviewPage) reviewsUpvotes = getTagFromHTML(site['reviewUpvote'], reviewPage) reviewStars = getTagFromHTML(site['reviewStarRating'], reviewPage) nextPage = getLinkFromHTML(site['reviewNextPage'], reviewPage) nextPage = [nextPage.pop()] i = 0 while len(nextPage) > 0 and i < 2: reviewPage = readHTML(site['prefix'] + nextPage[0]) reviewsHeading += getTagFromHTML(site['reviewHeading'], reviewPage) reviewsText += getTagFromHTML(site['reviewText'], reviewPage) reviewsUpvotes += getTagFromHTML(site['reviewUpvote'], reviewPage) reviewStars += getTagFromHTML(site['reviewStarRating'], reviewPage) nextPage = getLinkFromHTML(site['reviewNextPage'], reviewPage) nextPage = [nextPage.pop()] i += 1 reviews = [] for i in range(len(reviewsHeading)): review = {} review['heading'] = reviewsHeading[i] review['upvotes'] = [ reviewsUpvotes[i].split(' ')[0], reviewsUpvotes[i].split(' ')[2] ] review['stars'] = reviewStars[i].split(' ')[0] reviewTextSplit = reviewsText[i].split('.') finalText = '' for jj in reviewTextSplit: if (jj.find('Amazon') == -1 and jj.find('amazon') == -1 and jj.find('delivered') == -1 and jj.find('delivery') == -1): finalText += jj finalText += '.' review['text'] = finalText reviews.append(review) return reviews
def mainFunction(product): sites = xmlToDict.readXML('flipkart') for site in sites: #product = 'Dell Inspiron 15 3521 Laptop (3rd Gen Ci3/ 4GB/ 500GB/ Win8)' searchPage = readHTML(site['searchURL'] + (product)) productLinks = getLinkFromHTML(site['searchTags'], searchPage) productPage = readHTML(site['prefix'] + productLinks[0]) if site['allReviewTags']: reviewsLink = getLinkFromHTML(site['allReviewTags'], productPage) else: reviewsLink = productLinks reviewPage = readHTML(site['prefix'] + reviewsLink[0]) reviewsHeading = getTagFromHTML(site['reviewHeading'], reviewPage) reviewsText = getTagFromHTML(site['reviewText'], reviewPage) reviewsUpvotes = getTagFromHTML(site['reviewUpvote'], reviewPage) reviewStars = getTitleFromHTML(site['reviewStarRating'], reviewPage) nextPage = getLinkFromHTML(site['reviewNextPage'], reviewPage) if len(nextPage) > 0: reviewPage = readHTML(site['prefix'] + nextPage[0]) reviewsHeading += getTagFromHTML(site['reviewHeading'], reviewPage) reviewsText += getTagFromHTML(site['reviewText'], reviewPage) reviewsUpvotes += getTagFromHTML(site['reviewUpvote'], reviewPage) reviewStars += getTitleFromHTML(site['reviewStarRating'], reviewPage) reviews = [] for i in range(len(reviewsHeading)): review = {} review['heading'] = reviewsHeading[i] a = reviewsUpvotes[ 2 * i] if reviewsUpvotes[ 2 * i].find('%') >= 0: a = reviewsUpvotes[2 * i][0:reviewsUpvotes[ 2 * i].find('%')] a = int(a) b = int(reviewsUpvotes[2 * i + 1]) a = a * b / 100 review['upvotes'] = [ str(a), reviewsUpvotes[2 * i + 1]] review['stars'] = reviewStars[i].split(' ')[0] reviewTextSplit = reviewsText[i].split('.') finalText = '' for jj in reviewTextSplit: if(jj.find('Flipkart') == -1 and jj.find('flipkart') == -1 and jj.find('delivered') == -1 and jj.find('delivery') == -1): finalText += jj finalText += '. ' review['text'] = finalText reviews.append(review) return reviews