Пример #1
0
    def get_downloadable_content(self, course_url):
        """
        returns {"types" : {"class_name":"link", "class_name": "link"}, "arko_type": {"class_name":"link", "class_name": "link"}}
        """
        course_name = self.get_course_name_from_url(course_url)
        long_course_name = COURSES_DICT.get(course_name, course_name)

        print "* Collecting downloadable content from " + course_url

        # get the course name, and redirect to the course lecture page
        vidpage = self.browser.open(course_url)

        # extract the weekly classes
        soup = BeautifulSoup(vidpage)
        headers = soup.find("div", {"class": "wtabs extl"})

        head_names = headers.findAll("h2")
        resources = {}
        for head_name in head_names:
            ul = head_name.findNextSibling('ul')
            lis = ul.findAll('li')

            weeklyClasses = {}
            classNames = []
            for li in lis:
                className = li.a.text
                classNames.append(className)
                hrefs = li.find('a')
                resourceLink = hrefs['href']
                weeklyClasses[className] = resourceLink
            resources[head_name.text] = weeklyClasses
        return resources
Пример #2
0
    def download_course(self, cname, dest_dir="."):
        """Download all the contents (quizzes, videos, lecture notes, ...) of the course to the given destination directory (defaults to .)"""

        download_url = self.get_download_url_from_name(cname)
        print "* Need to download from ", download_url

        resource_dict = self.get_downloadable_content(download_url)

        long_cname = COURSES_DICT.get(cname, cname)
        print '* Got all downloadable content for ' + long_cname

        course_dir = os.path.abspath(os.path.join(dest_dir, long_cname))

        # ensure the target dir exists
        if not os.path.exists(course_dir):
            os.mkdir(course_dir)

        print "* " + cname + " will be downloaded to " + course_dir

        # download the standard pages
        print " - Downloading zipped/videos pages"

        for types, download_dict in resource_dict.iteritems():
            # ensure the course directory exists
            resource_dir = os.path.join(course_dir, types)
            if not os.path.exists(resource_dir):
                os.makedirs(resource_dir)
            print " -- Downloading ", types
            for fname, tfname in download_dict.iteritems():
                try:
                    print "    * Downloading ", fname, "..."
                    download_file(tfname, resource_dir, fname)
                except Exception as e:
                    print "     - failed ", fname, e
Пример #3
0
    def download_course(self, cname, dest_dir="."):
        """Download all the contents (quizzes, videos, lecture notes, ...) of the course to the given destination directory (defaults to .)"""

        download_url = self.get_download_url_from_name(cname)
        print "* Need to download from ", download_url

        resource_dict = self.get_downloadable_content(download_url)

        long_cname = COURSES_DICT.get(cname, cname)
        print '* Got all downloadable content for ' + long_cname

        course_dir = os.path.abspath(os.path.join(dest_dir, long_cname))

        # ensure the target dir exists
        if not os.path.exists(course_dir):
            os.mkdir(course_dir)

        print "* " + cname + " will be downloaded to " + course_dir

        # download the standard pages
        print " - Downloading zipped/videos pages"

        for types, download_dict in resource_dict.iteritems():
            # ensure the course directory exists
            resource_dir = os.path.join(course_dir, types)
            if not os.path.exists(resource_dir):
                os.makedirs(resource_dir)
            print " -- Downloading ", types
            for fname, tfname in download_dict.iteritems():
                try:
                    print "    * Downloading ", fname, "..."
                    self.download(tfname, target_dir=resource_dir, target_fname=fname)
                except Exception as e:
                    print "     - failed ", fname, e
Пример #4
0
    def get_downloadable_content(self, course_url):
        """
        returns {"types" : {"class_name":"link", "class_name": "link"}, "arko_type": {"class_name":"link", "class_name": "link"}}
        """
        course_name = self.get_course_name_from_url(course_url)
        long_course_name = COURSES_DICT.get(course_name, course_name)

        print "* Collecting downloadable content from " + course_url

        # get the course name, and redirect to the course lecture page
        vidpage = self.browser.open(course_url)

        # extract the weekly classes
        soup = BeautifulSoup(vidpage)
        headers = soup.find("div", { "class" : "wtabs extl" })

        head_names = headers.findAll("h2")
        resources = {}
        for head_name in head_names:
            ul = head_name.findNextSibling('ul')
            lis = ul.findAll('li')

            weeklyClasses = {}
            classNames = []
            for li in lis:
                className = li.a.text
                classNames.append(className)
                hrefs = li.find('a')
                resourceLink = hrefs['href']
                while className in weeklyClasses:
                    className += "."
                weeklyClasses[className] = resourceLink
            headText = head_name.text
            while headText in resources:
                headText += "."
            resources[headText] = weeklyClasses
        return resources