def get_downloadable_content(self, course_url): """ returns {"types" : {"class_name":"link", "class_name": "link"}, "arko_type": {"class_name":"link", "class_name": "link"}} """ course_name = self.get_course_name_from_url(course_url) long_course_name = COURSES_DICT.get(course_name, course_name) print "* Collecting downloadable content from " + course_url # get the course name, and redirect to the course lecture page vidpage = self.browser.open(course_url) # extract the weekly classes soup = BeautifulSoup(vidpage) headers = soup.find("div", {"class": "wtabs extl"}) head_names = headers.findAll("h2") resources = {} for head_name in head_names: ul = head_name.findNextSibling('ul') lis = ul.findAll('li') weeklyClasses = {} classNames = [] for li in lis: className = li.a.text classNames.append(className) hrefs = li.find('a') resourceLink = hrefs['href'] weeklyClasses[className] = resourceLink resources[head_name.text] = weeklyClasses return resources
def download_course(self, cname, dest_dir="."): """Download all the contents (quizzes, videos, lecture notes, ...) of the course to the given destination directory (defaults to .)""" download_url = self.get_download_url_from_name(cname) print "* Need to download from ", download_url resource_dict = self.get_downloadable_content(download_url) long_cname = COURSES_DICT.get(cname, cname) print '* Got all downloadable content for ' + long_cname course_dir = os.path.abspath(os.path.join(dest_dir, long_cname)) # ensure the target dir exists if not os.path.exists(course_dir): os.mkdir(course_dir) print "* " + cname + " will be downloaded to " + course_dir # download the standard pages print " - Downloading zipped/videos pages" for types, download_dict in resource_dict.iteritems(): # ensure the course directory exists resource_dir = os.path.join(course_dir, types) if not os.path.exists(resource_dir): os.makedirs(resource_dir) print " -- Downloading ", types for fname, tfname in download_dict.iteritems(): try: print " * Downloading ", fname, "..." download_file(tfname, resource_dir, fname) except Exception as e: print " - failed ", fname, e
def download_course(self, cname, dest_dir="."): """Download all the contents (quizzes, videos, lecture notes, ...) of the course to the given destination directory (defaults to .)""" download_url = self.get_download_url_from_name(cname) print "* Need to download from ", download_url resource_dict = self.get_downloadable_content(download_url) long_cname = COURSES_DICT.get(cname, cname) print '* Got all downloadable content for ' + long_cname course_dir = os.path.abspath(os.path.join(dest_dir, long_cname)) # ensure the target dir exists if not os.path.exists(course_dir): os.mkdir(course_dir) print "* " + cname + " will be downloaded to " + course_dir # download the standard pages print " - Downloading zipped/videos pages" for types, download_dict in resource_dict.iteritems(): # ensure the course directory exists resource_dir = os.path.join(course_dir, types) if not os.path.exists(resource_dir): os.makedirs(resource_dir) print " -- Downloading ", types for fname, tfname in download_dict.iteritems(): try: print " * Downloading ", fname, "..." self.download(tfname, target_dir=resource_dir, target_fname=fname) except Exception as e: print " - failed ", fname, e
def get_downloadable_content(self, course_url): """ returns {"types" : {"class_name":"link", "class_name": "link"}, "arko_type": {"class_name":"link", "class_name": "link"}} """ course_name = self.get_course_name_from_url(course_url) long_course_name = COURSES_DICT.get(course_name, course_name) print "* Collecting downloadable content from " + course_url # get the course name, and redirect to the course lecture page vidpage = self.browser.open(course_url) # extract the weekly classes soup = BeautifulSoup(vidpage) headers = soup.find("div", { "class" : "wtabs extl" }) head_names = headers.findAll("h2") resources = {} for head_name in head_names: ul = head_name.findNextSibling('ul') lis = ul.findAll('li') weeklyClasses = {} classNames = [] for li in lis: className = li.a.text classNames.append(className) hrefs = li.find('a') resourceLink = hrefs['href'] while className in weeklyClasses: className += "." weeklyClasses[className] = resourceLink headText = head_name.text while headText in resources: headText += "." resources[headText] = weeklyClasses return resources