def _assert_parse(self, filename, num_sections, num_lectures, num_resources, num_videos): filename = os.path.join(os.path.dirname(__file__), "fixtures", "html", filename) with open(filename) as syllabus: syllabus_page = syllabus.read() sections = coursera_dl.parse_syllabus(None, syllabus_page, None) # section count self.assertEqual(len(sections), num_sections) # lecture count lectures = [lec for sec in sections for lec in sec[1]] self.assertEqual(len(lectures), num_lectures) # resource count resources = [(res[0], len(res[1])) for lec in lectures for res in iteritems(lec[1])] self.assertEqual(sum(r for f, r in resources), num_resources) # mp4 count self.assertEqual(sum(r for f, r in resources if f == "mp4"), num_videos)
def test_parse_classes_with_bs4(self): classes = { 'datasci-001': (10, 97, 358, 97), # issue 134 'startup-001': (4, 44, 136, 44), # issue 137 'wealthofnations-001': (8, 74, 296, 74) # issue 131 } for class_, counts in classes.items(): filename = os.path.join( os.path.dirname(__file__), "fixtures", "html", "parsing-{0}-with-bs4.html".format(class_)) syllabus_page = open(filename).read() sections = coursera_dl.parse_syllabus(None, syllabus_page, None) # section count self.assertEqual(len(sections), counts[0]) # lecture count lectures = [lec for sec in sections for lec in sec[1]] self.assertEqual(len(lectures), counts[1]) # resource count resources = [res for lec in lectures for res in list(lec[1].items())] self.assertEqual(len(resources), counts[2]) # mp4 count mp4s = [res for res in resources if res[0] == "mp4"] self.assertEqual(len(mp4s), counts[3])
def _assert_parse(self, filename, num_sections, num_lectures, num_resources, num_videos): filename = os.path.join( os.path.dirname(__file__), "fixtures", "html", filename) with open(filename) as syllabus: syllabus_page = syllabus.read() sections = coursera_dl.parse_syllabus(None, syllabus_page, None) # section count self.assertEqual(len(sections), num_sections) # lecture count lectures = [lec for sec in sections for lec in sec[1]] self.assertEqual(len(lectures), num_lectures) # resource count resources = [(res[0], len(res[1])) for lec in lectures for res in iteritems(lec[1])] self.assertEqual(sum(r for f, r in resources), num_resources) # mp4 count self.assertEqual( sum(r for f, r in resources if f == "mp4"), num_videos)
def test_parse(self): sections = coursera_dl.parse_syllabus(self.syllabus_page, None) # section count self.assertEqual(len(sections), 23) # lecture count lectures = [lec for sec in sections for lec in sec[1]] self.assertEqual(len(lectures), 102) # resource count resources = [res for lec in lectures for res in lec[1].items()] self.assertEqual(len(resources), 502) # mp4 count mp4s = [res for res in resources if res[0] == "mp4"] self.assertEqual(len(mp4s), 102)
def test_links_to_wikipedia(self): syllabus_page = open(TEST_LINKS_TO_WIKIPEDIA).read() sections = coursera_dl.parse_syllabus(None, syllabus_page, None) # section count self.assertEqual(len(sections), 5) # lecture count lectures = [lec for sec in sections for lec in sec[1]] self.assertEqual(len(lectures), 37) # resource count resources = [res for lec in lectures for res in list(lec[1].items())] self.assertEqual(len(resources), 158) # mp4 count mp4s = [res for res in resources if res[0] == "mp4"] self.assertEqual(len(mp4s), 36)
def test_parse(self): syllabus_page = open(TEST_SYLLABUS_FILE).read() sections = coursera_dl.parse_syllabus(None, syllabus_page, None) # section count self.assertEqual(len(sections), 23) # lecture count lectures = [lec for sec in sections for lec in sec[1]] self.assertEqual(len(lectures), 102) # resource count resources = [res for lec in lectures for res in list(lec[1].items())] self.assertEqual(len(resources), 502) # mp4 count mp4s = [res for res in resources if res[0] == "mp4"] self.assertEqual(len(mp4s), 102)
def test_sections_missed2(self): syllabus_page = open(TEST_SECTIONS_NOT_TO_MISS2).read() sections = coursera_dl.parse_syllabus(None, syllabus_page, None) # section count self.assertEqual(len(sections), 20) # lecture count lectures = [lec for sec in sections for lec in sec[1]] self.assertEqual(len(lectures), 121) # resource count resources = [res for lec in lectures for res in list(lec[1].items())] self.assertEqual(len(resources), 382) # mp4 count mp4s = [res for res in resources if res[0] == "mp4"] self.assertEqual(len(mp4s), 121)
def test_parse_preview(self): syllabus_page = open(TEST_PREVIEW_FILE).read() sections = coursera_dl.parse_syllabus(None, syllabus_page, None) # section count self.assertEqual(len(sections), 20) # lecture count lectures = [lec for sec in sections for lec in sec[1]] self.assertEqual(len(lectures), 106) # resource count resources = [res for lec in lectures for res in lec[1].items()] self.assertEqual(len(resources), 106) # mp4 count mp4s = [res for res in resources if res[0] == "mp4"] self.assertEqual(len(mp4s), 106)
def test_sections_missed(self): self.syllabus_page = open(TEST_SECTIONS_NOT_TO_MISS).read() sections = coursera_dl.parse_syllabus(self.syllabus_page, None) # section count self.assertEqual(len(sections), 9) # lecture count lectures = [lec for sec in sections for lec in sec[1]] self.assertEqual(len(lectures), 61) # resource count resources = [res for lec in lectures for res in lec[1].items()] self.assertEqual(len(resources), 224) # mp4 count mp4s = [res for res in resources if res[0] == "mp4"] self.assertEqual(len(mp4s), 61)
def xtest_parse_preview(self): self.syllabus_page = open(TEST_PREVIEW_FILE).read() sections = coursera_dl.parse_syllabus(self.syllabus_page, None) # section count self.assertEqual(len(sections), 20) # lecture count lectures = [lec for sec in sections for lec in sec[1]] self.assertEqual(len(lectures), 106) # resource count resources = [res for lec in lectures for res in lec[1].items()] self.assertEqual(len(resources), 106) # mp4 count mp4s = [res for res in resources if res[0] == "mp4"] self.assertEqual(len(mp4s), 106)
def test_links_to_wikipedia(self): self.syllabus_page = open(TEST_LINKS_TO_WIKIPEDIA).read() sections = coursera_dl.parse_syllabus(self.syllabus_page, None) # section count self.assertEqual(len(sections), 5) # lecture count lectures = [lec for sec in sections for lec in sec[1]] self.assertEqual(len(lectures), 37) # resource count resources = [res for lec in lectures for res in lec[1].items()] self.assertEqual(len(resources), 158) # mp4 count mp4s = [res for res in resources if res[0] == "mp4"] self.assertEqual(len(mp4s), 36)
def test_parse(self): result = coursera_dl.parse_syllabus(self.syllabus_page, None) # test sections self.assertEqual(len(result), 23) # test lectures self.assertEqual(sum([len(x[1]) for x in result]), 102)