Exemplo n.º 1
0
    return starrating


def grab_reviews(pagetext):
    reviews = re.findall(
        '<span class="a-size-base review-text">(.+)</span></div><div',
        pagetext)
    return reviews


all_ratings = []

####################### Execution part #######################
for i in amazon:
    try:
        page_contents = graburlcontent(i)
        #print(page_contents)

        reviewer_names = grab_reviewer_name(page_contents)
        star_ratings = grab_star_rating(page_contents)[2:]
        reviews = grab_reviews(page_contents)

        x = 0
        for i in range(x, len(reviews)):
            print('Name: ', reviewer_names[x].replace('&amp;', 'and'), '\n')
            print('Stars they gave to the product: ', star_ratings[x], '\n')
            print('Review: ', reviews[x].replace('<br /><br />', ' '), '\n')
            print('\n' * 2)

            x += 1
Exemplo n.º 2
0
def grab_us_rank(pagetext):
    usrank = re.findall('alt=\'United States Flag\'><strong class="metrics-data\salign-vmiddle">\s([\d,]+)', pagetext)
    return usrank

def grab_global_rank(pagetext):
    global_rank = re.findall('<!-- Alexa web traffic metrics are available via our API at http://aws.amazon.com/awis -->\s([\d,]+)', pagetext)
    return global_rank



####################### Execution part #######################
print('Website\tLocal Rank\tGlobal Rank')
for i in sites:
    try:
        page_contents = graburlcontent(alexa + i)
        #print(page_contents)
        usrank = grab_us_rank(page_contents)
        global_rank = grab_global_rank(page_contents)
        
        print(i, '\t', usrank[0], '\t', global_rank[0])
        time.sleep(4)
        
       
        """
        reviewer_names = grab_reviewer_name(page_contents)
        star_ratings = grab_star_rating(page_contents)[2:]
        reviews = grab_reviews(page_contents)
        
        x = 0
        for i in range(x, len(reviews)):
Exemplo n.º 3
0
    return reviewername

def grab_star_rating(pagetext):
    starrating = re.findall('="a-icon-alt">(.+)</span></i', pagetext)
    return starrating

def grab_reviews(pagetext):
    reviews = re.findall('<span class="a-size-base review-text">(.+)</span></div><div', pagetext)
    return reviews    

all_ratings = []

####################### Execution part #######################
for i in amazon:
    try:
        page_contents = graburlcontent(i)
        #print(page_contents)
        
       
        
        reviewer_names = grab_reviewer_name(page_contents)
        star_ratings = grab_star_rating(page_contents)[2:]
        reviews = grab_reviews(page_contents)
        
        x = 0
        for i in range(x, len(reviews)):
            print('Name: ', reviewer_names[x].replace('&amp;','and'), '\n')
            print('Stars they gave to the product: ', star_ratings[x], '\n')
            print('Review: ', reviews[x].replace('<br /><br />',' '),'\n')
            print('\n'*2)