def test_get_talks_for_search_smoke(self):
        scraper = Search(test_util.get_HTML)
        talks_generator = scraper.get_talks_for_search('nuclear', 1)
        remaining_talks = timeit.itertools.islice(talks_generator, 1).next()

        self.assertLess(
            50, remaining_talks
        )  # 85 remaining at last check, just make sure we have a decent number remaining.

        talks = list(talks_generator)
        talks_per_page = 12.0
        self.assertEqual(talks_per_page, len(talks))
        # Actually no quarantee this talk is in top ten results. It is today.
        sample_talk = [
            s for s in talks if s[0] ==
            'Taylor Wilson: My radical plan for small nuclear fission reactors'
        ][0]
        self.assertEqual(
            'http://www.ted.com/talks/taylor_wilson_my_radical_plan_for_small_nuclear_fission_reactors',
            sample_talk[1])
        self.assertEqual(
            'http://images.ted.com/images/ted/a39d3332aa3d0cabe2e4c20ffb1a3934c012ee8b_240x180.jpg',
            sample_talk[2])

        last_page = 1 + int(math.ceil(remaining_talks / talks_per_page))
        talks_generator = scraper.get_talks_for_search('nuclear', last_page)
        remaining_talks = timeit.itertools.islice(talks_generator, 1).next()
        self.assertEqual(0, remaining_talks)
        self.assertLess(0, len(talks))
Exemplo n.º 2
0
    def test_get_talks_for_search_smoke(self):
        scraper = Search(test_util.get_HTML)
        talks_generator = scraper.get_talks_for_search('nuclear', 1)
        remaining_talks = timeit.itertools.islice(talks_generator, 1).next()

        self.assertLess(
            100, remaining_talks
        )  # 111 remaining at last check, just make sure we have a decent number remaining.

        talks = list(talks_generator)
        talks_per_page = 12.0
        self.assertEqual(talks_per_page, len(talks))
        # Actually no quarantee this talk is in top ten results. It is today.
        sample_talk = [
            s for s in talks
            if s[0] == 'Taylor Wilson: Yup, I built a nuclear fusion reactor'
        ][0]
        self.assertEqual(
            'http://www.ted.com/talks/taylor_wilson_yup_i_built_a_nuclear_fusion_reactor',
            sample_talk[1])
        self.assertEqual(
            'https://pi.tedcdn.com/r/pe.tedcdn.com/images/ted/fa41dca52e81265b6e20f7ad9647711c1a58efb7_1600x1200.jpg?',
            sample_talk[2])

        last_page = 1 + int(math.ceil(remaining_talks / talks_per_page))
        talks_generator = scraper.get_talks_for_search('nuclear', last_page)
        remaining_talks = timeit.itertools.islice(talks_generator, 1).next()
        self.assertEqual(0, remaining_talks)
        self.assertLess(0, len(talks))
Exemplo n.º 3
0
 def test_get_talks_for_search_decodes_entities(self):
     scraper = Search(test_util.get_HTML)
     talks_generator = scraper.get_talks_for_search('Onora', 1)
     timeit.itertools.islice(talks_generator, 1).next()
     self.assertTrue("Onora O'Neill: What we don't understand about trust"
                     in [t[0] for t in list(talks_generator)
                         ])  # "'" is encoded so we need to decode it
Exemplo n.º 4
0
 def test_search_for_speaker_name(self):
     scraper = Search(test_util.get_HTML)
     talks_generator = scraper.get_talks_for_search(
         'Christopher Soghoian', 1)  # Random name I haven't heard of
     self.assertEqual(
         0,
         timeit.itertools.islice(
             talks_generator,
             1).next())  # Should be uncommon enough to have all results
     list(
         talks_generator
     )  # We can generate a talk from each result - no contaminating "profile" results or similar
Exemplo n.º 5
0
class TestSearchScraper(unittest.TestCase):
    def setUp(self):
        self.sut = Search(CachedHTMLProvider().get_HTML)

    def test_get_talks_for_search_smoke(self):
        talks_generator = self.sut.get_talks_for_search('nuclear', 1)
        remaining_talks = timeit.itertools.islice(talks_generator, 1).next()

        self.assertLess(
            100, remaining_talks
        )  # 111 remaining at last check, just make sure we have a decent number remaining.

        talks = list(talks_generator)
        talks_per_page = 12.0
        self.assertEqual(talks_per_page, len(talks))
        # Actually no quarantee this talk is in top ten results. It is today.
        sample_talk = [
            s for s in talks
            if s[0] == 'Taylor Wilson: Yup, I built a nuclear fusion reactor'
        ][0]
        self.assertEqual(
            'http://www.ted.com/talks/taylor_wilson_yup_i_built_a_nuclear_fusion_reactor',
            sample_talk[1])
        self.assertEqual(
            'https://pi.tedcdn.com/r/pe.tedcdn.com/images/ted/fa41dca52e81265b6e20f7ad9647711c1a58efb7_1600x1200.jpg?',
            sample_talk[2])

        last_page = 1 + int(math.ceil(remaining_talks / talks_per_page))
        talks_generator = self.sut.get_talks_for_search('nuclear', last_page)
        remaining_talks = timeit.itertools.islice(talks_generator, 1).next()
        self.assertEqual(0, remaining_talks)
        self.assertLess(0, len(talks))

    @skip_ted_rate_limited
    def test_get_talks_for_search_decodes_entities(self):
        talks_generator = self.sut.get_talks_for_search('Onora', 1)
        timeit.itertools.islice(talks_generator, 1).next()
        self.assertTrue("Onora O'Neill: What we don't understand about trust"
                        in [t[0] for t in list(talks_generator)
                            ])  # "'" is encoded so we need to decode it

    def test_search_for_speaker_name(self):
        talks_generator = self.sut.get_talks_for_search(
            'Christopher Soghoian', 1)  # Random name I haven't heard of
        self.assertEqual(
            0,
            timeit.itertools.islice(
                talks_generator,
                1).next())  # Should be uncommon enough to have all results
        list(
            talks_generator
        )  # We can generate a talk from each result - no contaminating "profile" results or similar
    def test_get_talks_for_search_smoke(self):
        scraper = Search(test_util.get_HTML)
        talks_generator = scraper.get_talks_for_search('nuclear', 1)
        remaining_talks = timeit.itertools.islice(talks_generator, 1).next()

        self.assertLess(50, remaining_talks)  # 85 remaining at last check, just make sure we have a decent number remaining.

        talks = list(talks_generator)
        talks_per_page = 12.0
        self.assertEqual(talks_per_page, len(talks))
        # Actually no quarantee this talk is in top ten results. It is today.
        sample_talk = [s for s in talks if s[0] == 'Taylor Wilson: My radical plan for small nuclear fission reactors'][0]
        self.assertEqual('http://www.ted.com/talks/taylor_wilson_my_radical_plan_for_small_nuclear_fission_reactors', sample_talk[1])
        self.assertEqual('http://img.tedcdn.com/r/images.ted.com/images/ted/a39d3332aa3d0cabe2e4c20ffb1a3934c012ee8b_1600x1200.jpg?', sample_talk[2])

        last_page = 1 + int(math.ceil(remaining_talks / talks_per_page))
        talks_generator = scraper.get_talks_for_search('nuclear', last_page)
        remaining_talks = timeit.itertools.islice(talks_generator, 1).next()
        self.assertEqual(0, remaining_talks)
        self.assertLess(0, len(talks))
 def test_search_for_speaker_name(self):
     scraper = Search(test_util.get_HTML)
     talks_generator = scraper.get_talks_for_search('Christopher Soghoian', 1)  # Random name I haven't heard of
     self.assertEqual(0, timeit.itertools.islice(talks_generator, 1).next())  # Should be uncommon enough to have all results
     list(talks_generator)  # We can generate a talk from each result - no contaminating "profile" results or similar
 def test_get_talks_for_search_decodes_entities(self):
     scraper = Search(test_util.get_HTML)
     talks_generator = scraper.get_talks_for_search('Onora', 1)
     timeit.itertools.islice(talks_generator, 1).next()
     self.assertTrue("Onora O'Neill: What we don't understand about trust" in [t[0] for t in list(talks_generator)])  # "'" is encoded so we need to decode it
Exemplo n.º 9
0
 def setUp(self):
     self.sut = Search(CachedHTMLProvider().get_HTML)