def _extract_large_course_content(self, url): url = url.replace("10000", "50") if url.endswith("10000") else url try: data = self._session._get(url).json() except conn_error as error: logger.error(msg=f"Udemy Says: Connection error, {error}") time.sleep(0.8) sys.exit(0) else: _next = data.get("next") while _next: logger.progress(msg="Downloading course information .. ") try: resp = self._session._get(_next).json() except conn_error as error: logger.error(msg=f"Udemy Says: Connection error, {error}") time.sleep(0.8) sys.exit(0) else: _next = resp.get("next") results = resp.get("results") if results and isinstance(results, list): for d in resp["results"]: data["results"].append(d) return data
def _extract_subscribed_courses(self): def clean_urls(courses): _urls = [] courses = [ dict(tupleized) for tupleized in set( tuple(item.items()) for item in courses) ] for entry in courses: logger.progress( msg="Fetching all enrolled course(s) url(s).. ") url = entry.get("url") if not url: continue url = f"https://www.udemy.com{url}" _urls.append(url) _urls = list(set(_urls)) return _urls _temp = [] try: response = self._session._get(SUBSCRIBED_COURSES).json() except conn_error as error: logger.error(msg=f"Udemy Says: Connection error, {error}") time.sleep(0.8) sys.exit(0) except (ValueError, Exception) as error: logger.error(msg=f"Udemy Says: {error}") time.sleep(0.8) sys.exit(0) else: results = response.get("results", []) _temp.extend(results) _next = response.get("next") logger.progress(msg="Fetching all enrolled course(s) url(s).. ") while _next: logger.progress( msg="Fetching all enrolled course(s) url(s).. ") try: resp = self._session._get(_next) resp.raise_for_status() resp = resp.json() except conn_error as error: logger.error(msg=f"Udemy Says: Connection error, {error}") time.sleep(0.8) sys.exit(0) except Exception as error: logger.error(msg=f"Udemy Says: error, {error}") time.sleep(0.8) sys.exit(0) else: _next = resp.get("next") results = resp.get("results", []) _temp.extend(results) if _temp: _temp = clean_urls(_temp) return _temp
def clean_urls(courses): _urls = [] courses = [ dict(tupleized) for tupleized in set( tuple(item.items()) for item in courses) ] for entry in courses: logger.progress( msg="Fetching all enrolled course(s) url(s).. ") url = entry.get("url") if not url: continue url = f"https://www.udemy.com{url}" _urls.append(url) _urls = list(set(_urls)) return _urls
def _real_extract(self, url="", skip_hls_stream=False): _udemy = {} course_id, course_info = self._extract_course_info(url) if course_info and isinstance(course_info, dict): title = self._clean(course_info.get("title")) course_title = course_info.get("published_title") portal_name = course_info.get("portal_name") course_json = self._extract_course_json(url, course_id, portal_name) course = course_json.get("results") resource = course_json.get("detail") if resource: if not self._cookies: logger.error( msg= f"Udemy Says : '{resource}' Run udemy-dl against course within few seconds" ) if self._cookies: logger.error( msg=f"Udemy Says : '{resource}' cookies seems to be expired" ) logger.info( msg="Trying to logout now...", new_line=True, ) if not self._cookies: self._logout() logger.info( msg="Logged out successfully.", new_line=True, ) sys.exit(0) _udemy["access_token"] = self._access_token _udemy["course_id"] = course_id _udemy["title"] = title _udemy["course_title"] = course_title _udemy["chapters"] = [] counter = -1 if course: lecture_counter = 0 for entry in course: clazz = entry.get("_class") asset = entry.get("asset") supp_assets = entry.get("supplementary_assets") if clazz == "chapter": lecture_counter = 0 lectures = [] chapter_index = entry.get("object_index") chapter_title = "{0:02d} ".format( chapter_index) + self._clean(entry.get("title")) if chapter_title not in _udemy["chapters"]: _udemy["chapters"].append({ "chapter_title": chapter_title, "chapter_id": entry.get("id"), "chapter_index": chapter_index, "lectures": [], }) counter += 1 elif clazz == "lecture": lecture_counter += 1 lecture_id = entry.get("id") if len(_udemy["chapters"]) == 0: lectures = [] chapter_index = entry.get("object_index") chapter_title = "{0:02d} ".format( chapter_index) + self._clean(entry.get("title")) if chapter_title not in _udemy["chapters"]: _udemy["chapters"].append({ "chapter_title": chapter_title, "chapter_id": lecture_id, "chapter_index": chapter_index, "lectures": [], }) counter += 1 if lecture_id: retVal = [] if isinstance(asset, dict): asset_type = (asset.get("asset_type").lower() or asset.get("assetType").lower()) if asset_type == "article": if (isinstance(supp_assets, list) and len(supp_assets) > 0): retVal = self._extract_supplementary_assets( supp_assets) elif asset_type == "video": if (isinstance(supp_assets, list) and len(supp_assets) > 0): retVal = self._extract_supplementary_assets( supp_assets) elif asset_type == "e-book": retVal = self._extract_ebook(asset) elif asset_type == "file": retVal = self._extract_file(asset) elif asset_type == "presentation": retVal = self._extract_ppt(asset) elif asset_type == "audio": retVal = self._extract_audio(asset) logger.progress( msg="Downloading course information .. ") lecture_index = entry.get("object_index") lecture_title = "{0:03d} ".format( lecture_counter) + self._clean(entry.get("title")) data = asset.get("stream_urls") if data and isinstance(data, dict): sources = data.get("Video") tracks = asset.get("captions") duration = asset.get("time_estimation") sources = self._extract_sources( sources, skip_hls_stream=skip_hls_stream) subtitles = self._extract_subtitles(tracks) sources_count = len(sources) subtitle_count = len(subtitles) lectures.append({ "index": lecture_counter, "lecture_index": lecture_index, "lectures_id": lecture_id, "lecture_title": lecture_title, "duration": duration, "assets": retVal, "assets_count": len(retVal), "sources": sources, "subtitles": subtitles, "subtitle_count": subtitle_count, "sources_count": sources_count, }) else: lectures.append({ "index": lecture_counter, "lecture_index": lecture_index, "lectures_id": lecture_id, "lecture_title": lecture_title, "html_content": asset.get("body"), "extension": "html", "assets": retVal, "assets_count": len(retVal), "subtitle_count": 0, "sources_count": 0, }) _udemy["chapters"][counter]["lectures"] = lectures _udemy["chapters"][counter]["lectures_count"] = len( lectures) elif clazz == "quiz": lecture_id = entry.get("id") if len(_udemy["chapters"]) == 0: lectures = [] chapter_index = entry.get("object_index") chapter_title = "{0:02d} ".format( chapter_index) + self._clean(entry.get("title")) if chapter_title not in _udemy["chapters"]: lecture_counter = 0 _udemy["chapters"].append({ "chapter_title": chapter_title, "chapter_id": lecture_id, "chapter_index": chapter_index, "lectures": [], }) counter += 1 _udemy["chapters"][counter]["lectures"] = lectures _udemy["chapters"][counter]["lectures_count"] = len( lectures) _udemy["total_chapters"] = len(_udemy["chapters"]) _udemy["total_lectures"] = sum([ entry.get("lectures_count", 0) for entry in _udemy["chapters"] if entry ]) return _udemy