def scrape_yelp_reviews(): urls = [] with open('restaurant_urls.txt', 'r') as f: urls = f.readlines() f.close() with open('restaurant_reviews.txt', 'w') as f: for url in urls: url = url[:-1] print 'Scraping: "%s"' % url for s in scraper.get_reviews(url): f.write(s.encode('ascii', 'ignore')) f.write('\n\n') time.sleep(0.5) print '\tDone' f.close()
def update_keypoints_from_url(url): # TODO: only update review if it's old product_url = url.netloc + url.path try: error = None keypoints = [] # Get reviews try: if (url.netloc == 'www.yelp.com'): logging.info("url.netloc == 'www.yelp.com'") name, reviews = yelpreviewscraper.get_reviews(product_url) elif (url.netloc == 'www.metacritic.com'): logging.info("url.netloc == 'www.metacritic.com'") name, reviews = metacriticscraper.get_reviews(product_url) if not reviews: raise ValueError( 'No reviews returned') # To force except block except: # Log traceback for debugging logging.error(traceback.format_exc()) # Set error for user to see name = '' error = 'Cannot find reviews on page.' # Create keypoints if not error: try: keypoints = summarize.get_keypoints(reviews, name) except optimize.NotEnoughSentencesError: error = 'Not enough reviews to summarize.' # Store keypoints, or error # keypoints will be [] if error is set getreview.Review.create_or_update_review(name, url, keypoints, error=error) except Exception as error: logging.error( 'Failure in update keypoints task for url: {}'.format(product_url)) logging.error(traceback.format_exc()) return
def update_keypoints_from_url(url): # TODO: only update review if it's old product_url = url.netloc+url.path try: error = None keypoints = [] # Get reviews try: if(url.netloc == 'www.yelp.com'): logging.info("url.netloc == 'www.yelp.com'") name, reviews = yelpreviewscraper.get_reviews(product_url) elif(url.netloc == 'www.metacritic.com'): logging.info("url.netloc == 'www.metacritic.com'") name, reviews = metacriticscraper.get_reviews(product_url) if not reviews: raise ValueError('No reviews returned') # To force except block except: # Log traceback for debugging logging.error(traceback.format_exc()) # Set error for user to see name = '' error = 'Cannot find reviews on page.' # Create keypoints if not error: try: keypoints = summarize.get_keypoints(reviews, name) except optimize.NotEnoughSentencesError: error = 'Not enough reviews to summarize.' # Store keypoints, or error # keypoints will be [] if error is set getreview.Review.create_or_update_review(name, url, keypoints, error=error) except Exception as error: logging.error('Failure in update keypoints task for url: {}'.format(product_url)) logging.error(traceback.format_exc()) return
"""Write output to local hardrive. This will not function on a server. """ output_file = open(output_file_name, 'w') if overall_sentiment != None: output_file.write('Overall Sentiment: {}\n'.format(overall_sentiment)) for data_point in sentence_dataset: output_file.write('{} ({})\n'.format(data_point['sentence'], data_point['sentiment'])) output_file.close() if __name__ == '__main__': text = yelpreviewscraper.get_reviews('http://www.yelp.com/biz/slainte-irish-pub-new-bedford') sentences = text_to_sentences(text) for sentence in sentences: print sentence print '-----------------------' overall_sentiment, sentence_dataset = get_sentiments(text) #write_output(sentence_dataset, overall_sentiment) culled_dataset = cull_sentences(sentence_dataset) #write_output(culled_dataset, output_file_name = 'culled_sentiments.txt') key_points = optimize_keypoints(culled_dataset, overall_sentiment) print key_points for key_point in key_points:
This will not function on a server. """ output_file = open(output_file_name, 'w') if overall_sentiment != None: output_file.write('Overall Sentiment: {}\n'.format(overall_sentiment)) for data_point in sentence_dataset: output_file.write('{} ({})\n'.format(data_point['sentence'], data_point['sentiment'])) output_file.close() if __name__ == '__main__': text = yelpreviewscraper.get_reviews( 'http://www.yelp.com/biz/slainte-irish-pub-new-bedford') sentences = text_to_sentences(text) for sentence in sentences: print sentence print '-----------------------' overall_sentiment, sentence_dataset = get_sentiments(text) #write_output(sentence_dataset, overall_sentiment) culled_dataset = cull_sentences(sentence_dataset) #write_output(culled_dataset, output_file_name = 'culled_sentiments.txt') key_points = optimize_keypoints(culled_dataset, overall_sentiment) print key_points for key_point in key_points: