Пример #1
0
    def test_scrape_spreadsheet(self):
        """
        Make sure we grab the right data from spreadsheets
        """
        scraper = SpreadsheetScraper(self.source)
        stories = scraper.scrape_spreadsheet('tests/data/stories.xlsx')
        self.assertEqual(len(stories), 4)

        self.assertEqual(stories[0]['date'],
                         '42467')  # Crappy excel date format
        self.assertEqual(stories[0]['graphic_slug'], 'voting-wait-20160404')
        self.assertEqual(stories[0]['graphic_type'], 'Graphic')
        self.assertEqual(
            stories[0]['story_headline'],
            'What Keeps Election Officials Up At Night? Fear Of Long Lines At The Polls'
        )
        self.assertEqual(
            stories[0]['story_url'],
            'http://www.npr.org/2016/04/07/473293026/what-keeps-election-officials-up-at-night-fear-of-long-lines-at-the-polls'
        )
        self.assertEqual(stories[0]['contact'], 'Alyson Hurt')

        self.assertEqual(stories[0]['date'], '42467')
        self.assertEqual(stories[3]['graphic_slug'], 'seed-market-20160405')
        self.assertEqual(stories[3]['graphic_type'], 'Graphic')
        self.assertEqual(
            stories[3]['story_headline'],
            'Big Seed: Consolidation Is Shrinking The Industry Even Further')
        self.assertEqual(
            stories[3]['story_url'],
            'http://www.npr.org/sections/thesalt/2016/04/06/472960018/big-seed-consolidation-is-shrinking-the-industry-even-further'
        )
        self.assertEqual(stories[3]['contact'], 'Alyson Hurt')
Пример #2
0
    def test_write_spreadsheet(self, mock_upload):
        """
        Make sure we save the stories to the database when scraping from a
        spreadsheet
        """
        clear_stories()

        scraper = SpreadsheetScraper(self.source)
        stories = scraper.scrape_spreadsheet('tests/data/stories.xlsx')

        scraper.write(stories)

        results = Story.select()
        self.assertEqual(len(results), 4)

        for idx, story in enumerate(stories):
            self.assertEqual(results[idx].name, story['story_headline'])
            self.assertEqual(results[idx].url, story['story_url'])
Пример #3
0
    def test_write_spreadsheet(self, mock_upload):
        """
        Make sure we save the stories to the database when scraping from a
        spreadsheet
        """
        clear_stories()

        scraper = SpreadsheetScraper(self.source)
        stories = scraper.scrape_spreadsheet('tests/data/stories.xlsx')

        scraper.write(stories)

        results = Story.select()
        self.assertEqual(len(results), 4)

        for idx, story in enumerate(stories):
            self.assertEqual(results[idx].name, story['story_headline'])
            self.assertEqual(results[idx].url, story['story_url'])
Пример #4
0
    def test_scrape_spreadsheet(self):
        """
        Make sure we grab the right data from spreadsheets
        """
        scraper = SpreadsheetScraper(self.source)
        stories = scraper.scrape_spreadsheet('tests/data/stories.xlsx')
        self.assertEqual(len(stories), 4)

        self.assertEqual(stories[0]['date'], '42467') # Crappy excel date format
        self.assertEqual(stories[0]['graphic_slug'], 'voting-wait-20160404')
        self.assertEqual(stories[0]['graphic_type'], 'Graphic')
        self.assertEqual(stories[0]['story_headline'], 'What Keeps Election Officials Up At Night? Fear Of Long Lines At The Polls')
        self.assertEqual(stories[0]['story_url'], 'http://www.npr.org/2016/04/07/473293026/what-keeps-election-officials-up-at-night-fear-of-long-lines-at-the-polls')
        self.assertEqual(stories[0]['contact'], 'Alyson Hurt')

        self.assertEqual(stories[0]['date'], '42467')
        self.assertEqual(stories[3]['graphic_slug'], 'seed-market-20160405')
        self.assertEqual(stories[3]['graphic_type'], 'Graphic')
        self.assertEqual(stories[3]['story_headline'], 'Big Seed: Consolidation Is Shrinking The Industry Even Further')
        self.assertEqual(stories[3]['story_url'], 'http://www.npr.org/sections/thesalt/2016/04/06/472960018/big-seed-consolidation-is-shrinking-the-industry-even-further')
        self.assertEqual(stories[3]['contact'], 'Alyson Hurt')
Пример #5
0
def load_new_stories():
    """
    Goes through the sources you configured in `app_config` and adds any new
    stories to the database.
    """
    for source in app_config.SOURCES:
        if source['type'] == 'spreadsheet':
            stories = SpreadsheetScraper(source).scrape_and_load()

        elif source['type'] == 'rss':
            stories = RSSScraper(source).scrape_and_load()

        if source['type'] == 'pocky':
            stories = PockyScraper(source).scrape_and_load()

        for story in stories:
            slack_tools.send_tracking_started_message(story)
Пример #6
0
    def test_write_spreadsheet_duplicates(self, mock_upload):
        """
        Make sure stories don't get inserted more than once
        """
        mock_upload.return_value = 'http://image-url-here'

        clear_stories()

        scraper = SpreadsheetScraper(self.source)
        stories = scraper.scrape_spreadsheet('tests/data/stories.xlsx')

        # Insert the stories
        scraper.write(stories)
        results = Story.select()
        self.assertEqual(len(results), 4)

        # Now insert them again and make sure we don't have duplicates
        scraper.write(stories)
        results = Story.select()
        self.assertEqual(len(results), 4)
Пример #7
0
    def test_write_spreadsheet_duplicates(self, mock_upload):
        """
        Make sure stories don't get inserted more than once
        """
        mock_upload.return_value = 'http://image-url-here'

        clear_stories()

        scraper = SpreadsheetScraper(self.source)
        stories = scraper.scrape_spreadsheet('tests/data/stories.xlsx')

        # Insert the stories
        scraper.write(stories)
        results = Story.select()
        self.assertEqual(len(results), 4)

        # Now insert them again and make sure we don't have duplicates
        scraper.write(stories)
        results = Story.select()
        self.assertEqual(len(results), 4)