예제 #1
0
def scrape_yelp_reviews():
    urls = []
    
    with open('restaurant_urls.txt', 'r') as f:
        urls = f.readlines()
        f.close()
        
    with open('restaurant_reviews.txt', 'w') as f:
        for url in urls:
            url = url[:-1]
            print 'Scraping: "%s"' % url
            for s in scraper.get_reviews(url):
                f.write(s.encode('ascii', 'ignore'))
            f.write('\n\n')    
            time.sleep(0.5)
            print '\tDone'
        f.close()
예제 #2
0
def update_keypoints_from_url(url):
    # TODO: only update review if it's old
    product_url = url.netloc + url.path
    try:
        error = None
        keypoints = []
        # Get reviews
        try:
            if (url.netloc == 'www.yelp.com'):
                logging.info("url.netloc == 'www.yelp.com'")
                name, reviews = yelpreviewscraper.get_reviews(product_url)
            elif (url.netloc == 'www.metacritic.com'):
                logging.info("url.netloc == 'www.metacritic.com'")
                name, reviews = metacriticscraper.get_reviews(product_url)

            if not reviews:
                raise ValueError(
                    'No reviews returned')  # To force except block
        except:
            # Log traceback for debugging
            logging.error(traceback.format_exc())

            # Set error for user to see
            name = ''
            error = 'Cannot find reviews on page.'

        # Create keypoints
        if not error:
            try:
                keypoints = summarize.get_keypoints(reviews, name)
            except optimize.NotEnoughSentencesError:
                error = 'Not enough reviews to summarize.'

        # Store keypoints, or error
        # keypoints will be [] if error is set
        getreview.Review.create_or_update_review(name,
                                                 url,
                                                 keypoints,
                                                 error=error)
    except Exception as error:
        logging.error(
            'Failure in update keypoints task for url: {}'.format(product_url))
        logging.error(traceback.format_exc())
        return
예제 #3
0
def update_keypoints_from_url(url):
    # TODO: only update review if it's old
    product_url = url.netloc+url.path
    try:
        error = None
        keypoints = []
        # Get reviews
        try:
            if(url.netloc == 'www.yelp.com'):
                logging.info("url.netloc == 'www.yelp.com'")
                name, reviews = yelpreviewscraper.get_reviews(product_url)
            elif(url.netloc == 'www.metacritic.com'):
                logging.info("url.netloc == 'www.metacritic.com'")
                name, reviews = metacriticscraper.get_reviews(product_url)
            
            if not reviews:
                raise ValueError('No reviews returned') # To force except block
        except:
            # Log traceback for debugging
            logging.error(traceback.format_exc())

            # Set error for user to see
            name = ''
            error = 'Cannot find reviews on page.'

        # Create keypoints
        if not error:
            try:
                keypoints = summarize.get_keypoints(reviews, name)
            except optimize.NotEnoughSentencesError:
                error = 'Not enough reviews to summarize.'

        # Store keypoints, or error
        # keypoints will be [] if error is set
        getreview.Review.create_or_update_review(name, url, keypoints, error=error)
    except Exception as error:
        logging.error('Failure in update keypoints task for url: {}'.format(product_url))
        logging.error(traceback.format_exc())
        return
예제 #4
0
    """Write output to local hardrive.

    This will not function on a server.
    """
    output_file = open(output_file_name, 'w')

    if overall_sentiment != None:
        output_file.write('Overall Sentiment: {}\n'.format(overall_sentiment))
    for data_point in sentence_dataset:
        output_file.write('{} ({})\n'.format(data_point['sentence'], data_point['sentiment']))

    output_file.close()


if __name__ == '__main__':
    text = yelpreviewscraper.get_reviews('http://www.yelp.com/biz/slainte-irish-pub-new-bedford')

    sentences = text_to_sentences(text)
    for sentence in sentences:
        print sentence
        print '-----------------------'

    overall_sentiment, sentence_dataset = get_sentiments(text)
    #write_output(sentence_dataset, overall_sentiment)

    culled_dataset = cull_sentences(sentence_dataset)
    #write_output(culled_dataset, output_file_name = 'culled_sentiments.txt')

    key_points = optimize_keypoints(culled_dataset, overall_sentiment)
    print key_points
    for key_point in key_points:
예제 #5
0
    This will not function on a server.
    """
    output_file = open(output_file_name, 'w')

    if overall_sentiment != None:
        output_file.write('Overall Sentiment: {}\n'.format(overall_sentiment))
    for data_point in sentence_dataset:
        output_file.write('{} ({})\n'.format(data_point['sentence'],
                                             data_point['sentiment']))

    output_file.close()


if __name__ == '__main__':
    text = yelpreviewscraper.get_reviews(
        'http://www.yelp.com/biz/slainte-irish-pub-new-bedford')

    sentences = text_to_sentences(text)
    for sentence in sentences:
        print sentence
        print '-----------------------'

    overall_sentiment, sentence_dataset = get_sentiments(text)
    #write_output(sentence_dataset, overall_sentiment)

    culled_dataset = cull_sentences(sentence_dataset)
    #write_output(culled_dataset, output_file_name = 'culled_sentiments.txt')

    key_points = optimize_keypoints(culled_dataset, overall_sentiment)
    print key_points
    for key_point in key_points: