示例#1
0
    def _assert_parse(self, filename, num_sections, num_lectures,
                      num_resources, num_videos):
        filename = os.path.join(os.path.dirname(__file__), "fixtures", "html",
                                filename)

        with open(filename) as syllabus:
            syllabus_page = syllabus.read()

            sections = coursera_dl.parse_syllabus(None, syllabus_page, None)

            # section count
            self.assertEqual(len(sections), num_sections)

            # lecture count
            lectures = [lec for sec in sections for lec in sec[1]]
            self.assertEqual(len(lectures), num_lectures)

            # resource count
            resources = [(res[0], len(res[1])) for lec in lectures
                         for res in iteritems(lec[1])]
            self.assertEqual(sum(r for f, r in resources), num_resources)

            # mp4 count
            self.assertEqual(sum(r for f, r in resources if f == "mp4"),
                             num_videos)
示例#2
0
    def test_parse_classes_with_bs4(self):
        classes = {
            'datasci-001': (10, 97, 358, 97),  # issue 134
            'startup-001': (4, 44, 136, 44),   # issue 137
            'wealthofnations-001': (8, 74, 296, 74)  # issue 131
        }

        for class_, counts in classes.items():
            filename = os.path.join(
                os.path.dirname(__file__), "fixtures", "html",
                "parsing-{0}-with-bs4.html".format(class_))

            syllabus_page = open(filename).read()

            sections = coursera_dl.parse_syllabus(None, syllabus_page, None)

            # section count
            self.assertEqual(len(sections), counts[0])

            # lecture count
            lectures = [lec for sec in sections for lec in sec[1]]
            self.assertEqual(len(lectures), counts[1])

            # resource count
            resources = [res
                         for lec in lectures for res in list(lec[1].items())]
            self.assertEqual(len(resources), counts[2])

            # mp4 count
            mp4s = [res for res in resources if res[0] == "mp4"]
            self.assertEqual(len(mp4s), counts[3])
示例#3
0
    def _assert_parse(self, filename, num_sections, num_lectures,
                      num_resources, num_videos):
        filename = os.path.join(
            os.path.dirname(__file__), "fixtures", "html",
            filename)

        with open(filename) as syllabus:
            syllabus_page = syllabus.read()

            sections = coursera_dl.parse_syllabus(None, syllabus_page, None)

            # section count
            self.assertEqual(len(sections), num_sections)

            # lecture count
            lectures = [lec for sec in sections for lec in sec[1]]
            self.assertEqual(len(lectures), num_lectures)

            # resource count
            resources = [(res[0], len(res[1]))
                         for lec in lectures for res in iteritems(lec[1])]
            self.assertEqual(sum(r for f, r in resources), num_resources)

            # mp4 count
            self.assertEqual(
                sum(r for f, r in resources if f == "mp4"),
                num_videos)
示例#4
0
    def test_parse(self):
        sections = coursera_dl.parse_syllabus(self.syllabus_page, None)

        # section count
        self.assertEqual(len(sections), 23)

        # lecture count
        lectures = [lec for sec in sections for lec in sec[1]]
        self.assertEqual(len(lectures), 102)

        # resource count
        resources = [res for lec in lectures for res in lec[1].items()]
        self.assertEqual(len(resources), 502)

        # mp4 count
        mp4s = [res for res in resources if res[0] == "mp4"]
        self.assertEqual(len(mp4s), 102)
示例#5
0
    def test_links_to_wikipedia(self):
        syllabus_page = open(TEST_LINKS_TO_WIKIPEDIA).read()

        sections = coursera_dl.parse_syllabus(None, syllabus_page, None)

        # section count
        self.assertEqual(len(sections), 5)

        # lecture count
        lectures = [lec for sec in sections for lec in sec[1]]
        self.assertEqual(len(lectures), 37)

        # resource count
        resources = [res for lec in lectures for res in list(lec[1].items())]
        self.assertEqual(len(resources), 158)

        # mp4 count
        mp4s = [res for res in resources if res[0] == "mp4"]
        self.assertEqual(len(mp4s), 36)
示例#6
0
    def test_parse(self):
        syllabus_page = open(TEST_SYLLABUS_FILE).read()

        sections = coursera_dl.parse_syllabus(None, syllabus_page, None)

        # section count
        self.assertEqual(len(sections), 23)

        # lecture count
        lectures = [lec for sec in sections for lec in sec[1]]
        self.assertEqual(len(lectures), 102)

        # resource count
        resources = [res for lec in lectures for res in list(lec[1].items())]
        self.assertEqual(len(resources), 502)

        # mp4 count
        mp4s = [res for res in resources if res[0] == "mp4"]
        self.assertEqual(len(mp4s), 102)
示例#7
0
    def test_sections_missed2(self):
        syllabus_page = open(TEST_SECTIONS_NOT_TO_MISS2).read()

        sections = coursera_dl.parse_syllabus(None, syllabus_page, None)

        # section count
        self.assertEqual(len(sections), 20)

        # lecture count
        lectures = [lec for sec in sections for lec in sec[1]]
        self.assertEqual(len(lectures), 121)

        # resource count
        resources = [res for lec in lectures for res in list(lec[1].items())]
        self.assertEqual(len(resources), 382)

        # mp4 count
        mp4s = [res for res in resources if res[0] == "mp4"]
        self.assertEqual(len(mp4s), 121)
示例#8
0
    def test_parse_preview(self):
        syllabus_page = open(TEST_PREVIEW_FILE).read()

        sections = coursera_dl.parse_syllabus(None, syllabus_page, None)

        # section count
        self.assertEqual(len(sections), 20)

        # lecture count
        lectures = [lec for sec in sections for lec in sec[1]]
        self.assertEqual(len(lectures), 106)

        # resource count
        resources = [res for lec in lectures for res in lec[1].items()]
        self.assertEqual(len(resources), 106)

        # mp4 count
        mp4s = [res for res in resources if res[0] == "mp4"]
        self.assertEqual(len(mp4s), 106)
示例#9
0
    def test_sections_missed(self):
        self.syllabus_page = open(TEST_SECTIONS_NOT_TO_MISS).read()

        sections = coursera_dl.parse_syllabus(self.syllabus_page, None)

        # section count
        self.assertEqual(len(sections), 9)

        # lecture count
        lectures = [lec for sec in sections for lec in sec[1]]
        self.assertEqual(len(lectures), 61)

        # resource count
        resources = [res for lec in lectures for res in lec[1].items()]
        self.assertEqual(len(resources), 224)

        # mp4 count
        mp4s = [res for res in resources if res[0] == "mp4"]
        self.assertEqual(len(mp4s), 61)
示例#10
0
    def xtest_parse_preview(self):
        self.syllabus_page = open(TEST_PREVIEW_FILE).read()

        sections = coursera_dl.parse_syllabus(self.syllabus_page, None)

        # section count
        self.assertEqual(len(sections), 20)

        # lecture count
        lectures = [lec for sec in sections for lec in sec[1]]
        self.assertEqual(len(lectures), 106)

        # resource count
        resources = [res for lec in lectures for res in lec[1].items()]
        self.assertEqual(len(resources), 106)

        # mp4 count
        mp4s = [res for res in resources if res[0] == "mp4"]
        self.assertEqual(len(mp4s), 106)
示例#11
0
    def test_links_to_wikipedia(self):
        self.syllabus_page = open(TEST_LINKS_TO_WIKIPEDIA).read()

        sections = coursera_dl.parse_syllabus(self.syllabus_page, None)

        # section count
        self.assertEqual(len(sections), 5)

        # lecture count
        lectures = [lec for sec in sections for lec in sec[1]]
        self.assertEqual(len(lectures), 37)

        # resource count
        resources = [res for lec in lectures for res in lec[1].items()]
        self.assertEqual(len(resources), 158)

        # mp4 count
        mp4s = [res for res in resources if res[0] == "mp4"]
        self.assertEqual(len(mp4s), 36)
示例#12
0
 def test_parse(self):
   result = coursera_dl.parse_syllabus(self.syllabus_page, None)
   # test sections
   self.assertEqual(len(result), 23)
   # test lectures
   self.assertEqual(sum([len(x[1]) for x in result]), 102)