def is_session_exists(self): is_exists = False conf = load_configs() if conf: cookies = conf.get("cookies") if cookies: cookies = extract_cookie_string(cookies) access_token = cookies.get("access_token") client_id = cookies.get("client_id") self._session._set_auth_headers( # pylint: disable=W access_token=access_token, client_id=client_id) self._session._session.cookies.update( # pylint: disable=W {"access_token": access_token}) try: url = "https://www.udemy.com/api-2.0/courses/" resp = self._session._get(url) # pylint: disable=W resp.raise_for_status() is_exists = True except Exception as error: # pylint: disable=W logger.error( msg= f"Udemy Says: {error} session cookie seems to be expired..." ) is_exists = False return is_exists, conf
def _extract_large_course_content(self, url): url = url.replace("10000", "50") if url.endswith("10000") else url try: data = self._session._get(url).json() except conn_error as error: logger.error(msg=f"Udemy Says: Connection error, {error}") time.sleep(0.8) sys.exit(0) else: _next = data.get("next") while _next: logger.progress(msg="Downloading course information .. ") try: resp = self._session._get(_next).json() except conn_error as error: logger.error(msg=f"Udemy Says: Connection error, {error}") time.sleep(0.8) sys.exit(0) else: _next = resp.get("next") results = resp.get("results") if results and isinstance(results, list): for d in resp["results"]: data["results"].append(d) return data
def _extract_m3u8(self, url): """extracts m3u8 streams""" _temp = [] try: resp = self._session._get(url) resp.raise_for_status() raw_data = resp.text m3u8_object = m3u8.loads(raw_data) playlists = m3u8_object.playlists seen = set() for pl in playlists: resolution = pl.stream_info.resolution codecs = pl.stream_info.codecs if not resolution: continue if not codecs: continue width, height = resolution download_url = pl.uri if height not in seen: seen.add(height) _temp.append({ "type": "hls", "height": str(height), "width": str(width), "extension": "mp4", "download_url": download_url, }) except Exception as error: logger.error( msg=f"Udemy Says : '{error}' while fetching hls streams..") return _temp
def download_subtitles(self, subtitles, filepath, language="en", keep_vtt=False): """This function will simply download the subtitles..""" if language and subtitles: subtitle = subtitles[0] subtitles = subtitle.get_subtitle(language) if subtitles: for sub in subtitles: title = f"{sub.title}.{sub.language}" filename = os.path.join(filepath, sub.filename) logger.info(msg="Downloading subtitle(s)", new_line=True, before=True) logger.info(msg=f"Downloading ({title})", new_line=True) try: retval = sub.download( filepath=filepath, quiet=True, callback=self.show_progress, ) msg = retval.get("msg") if msg == "already downloaded": logger.already_downloaded(msg=f"Subtitle : '{title}'") elif msg == "download": logger.info(msg=f"Downloaded ({title})", new_line=True) self.convert(filename=filename, keep_vtt=keep_vtt) else: logger.download_skipped( msg=f"Subtitle : '{title}' ", reason=msg ) except KeyboardInterrupt: logger.error(msg="User Interrupted..", new_line=True) sys.exit(0)
def download_assets(self, assets, filepath): """This function will simply download the asstes..""" if assets: for asset in assets: title = asset.filename logger.info(msg="Downloading asset(s)", new_line=True, before=True) logger.info(msg=f"Downloading ({title})", new_line=True) try: retval = asset.download( filepath=filepath, quiet=True, callback=self.show_progress, ) msg = retval.get("msg") if msg == "already downloaded": logger.already_downloaded(msg=f"Asset : '{title}'") elif msg == "download": logger.info(msg=f"Downloaded ({title})", new_line=True) else: logger.download_skipped(msg=f"Asset : '{title}' ", reason=msg) except KeyboardInterrupt: logger.error(msg="User Interrupted..", new_line=True) sys.exit(0)
def download_lecture(self, lecture, filepath, current, total, quality): """This function will simply download the lectures..""" if quality and lecture: lecture = lecture.get_quality(quality) if lecture: title = lecture.title logger.info(msg=f"Lecture(s) : ({current} of {total})", new_line=True, before=True) logger.info(msg=f"Downloading ({title})", new_line=True) try: retval = lecture.download( filepath=filepath, quiet=True, callback=self.show_progress, ) msg = retval.get("msg") if msg == "already downloaded": logger.already_downloaded(msg=f"Lecture : '{title}'") elif msg == "download": logger.info(msg=f"Downloaded ({title})", new_line=True) else: logger.download_skipped(msg=f"Lecture : '{title}' ", reason=msg) except KeyboardInterrupt: logger.error(msg="User Interrupted..", new_line=True) sys.exit(0)
def _login(self, username="", password="", cookies="", cache_session=False): # check if we already have session on udemy. auth = UdemyAuth(cache_session=cache_session) is_exists, conf = auth.is_session_exists() if is_exists and username and password: logger.info( msg="Using existing session..", new_line=True, ) cookies = conf.get("cookies") if not is_exists and not cookies: cookies = None if not username and not password: logger.info( msg="Updating session cookie..", new_line=True, ) username = conf.get("username") password = conf.get("password") if not username and not password and not cookies: print("") cookies = getpass.get_access_token(prompt="Access Token : ") if not cookies: username = getpass.getuser(prompt="Username : "******"Password : "******"\n") if not cookies and not username and not password: logger.error( msg= f"You should either provide Fresh Access Token or Username/Password to create new udemy session.." ) sys.exit(0) if not cookies: auth.username = username auth.password = password self._session, self._access_token = auth.authenticate() if cookies: self._cookies = extract_cookie_string(raw_cookies=cookies) self._access_token = self._cookies.get("access_token") client_id = self._cookies.get("client_id") self._session, _ = auth.authenticate( access_token=self._access_token, client_id=client_id) self._session._session.cookies.update(self._cookies) if self._session is not None: return {"login": "******"} else: return {"login": "******"}
def _my_courses(self, portal_name): results = [] try: url = MY_COURSES_URL.format(portal_name=portal_name) webpage = self._session._get(url).json() except conn_error as error: logger.error(msg=f"Udemy Says: Connection error, {error}") time.sleep(0.8) sys.exit(0) except (ValueError, Exception) as error: logger.error(msg=f"Udemy Says: {error}") time.sleep(0.8) sys.exit(0) else: results = webpage.get("results", []) return results
def _fetch_course(self): auth = {} if not self._cookies: auth = self._login(username=self._username, password=self._password) if not auth and self._cookies: auth = self._login(cookies=self._cookies) if auth.get("login") == "successful": logger.info(msg="Logged in successfully.", new_line=True) logger.info(msg="Fetching all enrolled course(s) url(s)..") self._courses = self._extract_subscribed_courses() time.sleep(1) logger.success(msg="Fetching all enrolled course(s) url(s).. ") self._logout() if auth.get("login") == "failed": logger.error(msg="Failed to login ..\n") sys.exit(0)
def _extract_subscribed_courses(self): def clean_urls(courses): _urls = [] courses = [ dict(tupleized) for tupleized in set( tuple(item.items()) for item in courses) ] for entry in courses: logger.progress( msg="Fetching all enrolled course(s) url(s).. ") url = entry.get("url") if not url: continue url = f"https://www.udemy.com{url}" _urls.append(url) _urls = list(set(_urls)) return _urls _temp = [] try: response = self._session._get(SUBSCRIBED_COURSES).json() except conn_error as error: logger.error(msg=f"Udemy Says: Connection error, {error}") time.sleep(0.8) sys.exit(0) except (ValueError, Exception) as error: logger.error(msg=f"Udemy Says: {error}") time.sleep(0.8) sys.exit(0) else: results = response.get("results", []) _temp.extend(results) _next = response.get("next") logger.progress(msg="Fetching all enrolled course(s) url(s).. ") while _next: logger.progress( msg="Fetching all enrolled course(s) url(s).. ") try: resp = self._session._get(_next) resp.raise_for_status() resp = resp.json() except conn_error as error: logger.error(msg=f"Udemy Says: Connection error, {error}") time.sleep(0.8) sys.exit(0) except Exception as error: logger.error(msg=f"Udemy Says: error, {error}") time.sleep(0.8) sys.exit(0) else: _next = resp.get("next") results = resp.get("results", []) _temp.extend(results) if _temp: _temp = clean_urls(_temp) return _temp
def _extract_course_json(self, url, course_id, portal_name): self._session._headers.update({"Referer": url}) url = COURSE_URL.format(portal_name=portal_name, course_id=course_id) try: resp = self._session._get(url) if resp.status_code in [502, 503]: resp = self._extract_large_course_content(url=url) else: resp = resp.json() except conn_error as error: logger.error(msg=f"Udemy Says: Connection error, {error}") time.sleep(0.8) sys.exit(0) except (ValueError, Exception): resp = self._extract_large_course_content(url=url) return resp else: return resp
def _fetch_course(self): if self._have_basic: return auth = {} if not self._cookies: auth = self._login( username=self._username, password=self._password, cache_session=self._cache_session, ) if not auth and self._cookies: auth = self._login(cookies=self._cookies, cache_session=self._cache_session) if auth.get("login") == "successful": logger.info(msg="Logged in successfully.", new_line=True) logger.info(msg="Downloading course information ..") self._info = self._real_extract( self._url, skip_hls_stream=self._skip_hls_stream) time.sleep(1) logger.success(msg="Downloaded course information .. ") access_token = self._info["access_token"] self._id = self._info["course_id"] self._title = self._info["course_title"] self._chapters_count = self._info["total_chapters"] self._total_lectures = self._info["total_lectures"] self._chapters = [ InternUdemyChapter(z, access_token=access_token) for z in self._info["chapters"] ] logger.info( msg="Trying to logout now...", new_line=True, ) if not self._cookies: self._logout() logger.info( msg="Logged out successfully.", new_line=True, ) self._have_basic = True if auth.get("login") == "failed": logger.error(msg="Failed to login ..\n") sys.exit(0)
def _subscribed_collection_courses(self, portal_name): url = COLLECTION_URL.format(portal_name=portal_name) courses_lists = [] try: webpage = self._session._get(url).json() except conn_error as error: logger.error(msg=f"Udemy Says: Connection error, {error}") time.sleep(0.8) sys.exit(0) except (ValueError, Exception) as error: logger.error(msg=f"Udemy Says: {error}") time.sleep(0.8) sys.exit(0) else: results = webpage.get("results", []) if results: [ courses_lists.extend(courses.get("courses", [])) for courses in results if courses.get("courses", []) ] return courses_lists
def extract_cookie_string(raw_cookies): cookies = {} try: access_token = re.search( r"(?i)(?:access_token=(?P<access_token>\w+))", raw_cookies ) except Exception as error: logger.error( msg=f"Cookies error, {error}, unable to extract access_token from cookies." ) sys.exit(0) if not access_token: logger.error(msg="Unable to find access_token, proper cookies required") logger.info( msg="follow: https://github.com/r0oth3x49/udemy-dl#how-to-login-with-cookie", new_line=True, ) sys.stdout.flush() sys.exit(0) access_token = access_token.group("access_token") cookies.update({"access_token": access_token}) return cookies
def _subscribed_courses(self, portal_name, course_name): results = [] self._session._headers.update({ "Host": "{portal_name}.udemy.com".format(portal_name=portal_name), "Referer": "https://{portal_name}.udemy.com/home/my-courses/search/?q={course_name}" .format(portal_name=portal_name, course_name=course_name), }) url = COURSE_SEARCH.format(portal_name=portal_name, course_name=course_name) try: webpage = self._session._get(url).json() except conn_error as error: logger.error(msg=f"Udemy Says: Connection error, {error}") time.sleep(0.8) sys.exit(0) except (ValueError, Exception) as error: logger.error(msg=f"Udemy Says: {error} on {url}") time.sleep(0.8) sys.exit(0) else: results = webpage.get("results", []) return results
def main(): """main function""" sys.stdout.write(banner()) version = "%(prog)s {version}".format(version=udemy.__version__) description = "A cross-platform python based utility to download courses from udemy for personal offline use." parser = argparse.ArgumentParser(description=description, conflict_handler="resolve") parser.add_argument( "course", help="Udemy course or file containing list of course URL(s).", type=str, ) general = parser.add_argument_group("General") general.add_argument("-h", "--help", action="help", help="Shows the help.") general.add_argument("-v", "--version", action="version", version=version, help="Shows the version.") authentication = parser.add_argument_group("Authentication") authentication.add_argument( "-u", "--username", dest="username", type=str, help="Username in udemy.", metavar="", ) authentication.add_argument( "-p", "--password", dest="password", type=str, help="Password of your account.", metavar="", ) authentication.add_argument( "-k", "--cookies", dest="cookies", type=str, help="Cookies to authenticate with.", metavar="", ) advance = parser.add_argument_group("Advance") advance.add_argument( "-o", "--output", dest="output", type=str, default=os.getcwd(), help="Download to specific directory.", metavar="", ) advance.add_argument( "-q", "--quality", dest="quality", type=int, help="Download specific video quality.", metavar="", ) advance.add_argument( "-c", "--chapter", dest="chapter", type=int, help="Download specific chapter from course.", metavar="", ) advance.add_argument( "-l", "--lecture", dest="lecture", type=int, help="Download specific lecture from chapter(s).", metavar="", ) advance.add_argument( "-s", "--sub-lang", dest="language", type=str, help="Download specific subtitle/caption (e.g:- en).", metavar="", default="en", ) advance.add_argument( "--chapter-start", dest="chapter_start", type=int, help="Download from specific position within course.", metavar="", ) advance.add_argument( "--chapter-end", dest="chapter_end", type=int, help="Download till specific position within course.", metavar="", ) advance.add_argument( "--lecture-start", dest="lecture_start", type=int, help="Download from specific position within chapter(s).", metavar="", ) advance.add_argument( "--lecture-end", dest="lecture_end", type=int, help="Download till specific position within chapter(s).", metavar="", ) other = parser.add_argument_group("Others") other.add_argument( "--info", dest="info", action="store_true", help="List all lectures with available resolution.", ) other.add_argument( "--cache", dest="cache_session", action="store_true", help="Cache your session to avoid providing again.", ) other.add_argument( "--keep-vtt", dest="keep_vtt", action="store_true", help="Keep WebVTT caption(s).", ) other.add_argument( "--sub-only", dest="caption_only", action="store_true", help="Download captions/subtitle only.", ) other.add_argument( "--skip-sub", dest="skip_captions", action="store_true", help="Download course but skip captions/subtitle.", ) other.add_argument( "--skip-hls", dest="skip_hls_stream", action="store_true", help="Download course but skip hls streams. (fast fetching).", ) other.add_argument( "--assets-only", dest="assets_only", action="store_true", help="Download asset(s) only.", ) other.add_argument( "--skip-assets", dest="skip_assets", action="store_true", help="Download course but skip asset(s).", ) args = parser.parse_args() if args.cookies: f_in = open(args.cookies) with open(args.cookies) as f_in: cookies = "\n".join( [line for line in (l.strip() for l in f_in) if line]) args.cookies = cookies if not args.username and not args.password and not args.cookies: # check if we already have a session.. configs = load_configs() if not configs: # if not ask user for user/pass or access token (cookie) args.username = getpass.getuser(prompt="Username : "******"Password : "******"\n") if not args.username and not args.password: print("") args.cookies = getpass.get_access_token( prompt="Access Token : ") if args.cookies: print("\n") if configs: cookies = configs.get("cookies") if not cookies: args.username = configs.get("username") args.password = configs.get("password") if cookies: args.cookies = cookies args.quality = args.quality if args.quality else configs.get( "quality") args.output = args.output if args.output else configs.get("output") args.language = args.language if args.language else configs.get( "language") url_or_courses = extract_url_or_courses(args.course) if not args.username and not args.password and not args.cookies: print("\n") logger.error( msg= f"You should either provide fresh access token or username/password for udemy.." ) sys.exit(0) udemy_obj = Udemy( url_or_courses=url_or_courses, username=args.username, password=args.password, cookies=args.cookies, cache_session=args.cache_session, ) # setting the caching default so that we can avoid future login attemps. if args.cache_session: _ = to_configs( username=args.username, password=args.password, cookies=args.cookies, quality=args.quality, output=args.output, language=args.language, ) dl_assets = dl_lecture = dl_subtitles = True if args.assets_only: dl_lecture = False dl_subtitles = False args.skip_hls_stream = True if args.skip_assets: dl_assets = False if args.caption_only: dl_lecture = False dl_assets = False args.skip_hls_stream = True if args.skip_captions: dl_subtitles = False if not args.info: if args.quality and args.quality > 720 and args.skip_hls_stream: args.quality = "" logger.warning( msg= "You cannot use --skip-hls and -q/--quality options togather, considering --skip-hls only.." ) udemy_obj.course_download( path=args.output, quality=args.quality, language=args.language, dl_assets=dl_assets, dl_lecture=dl_lecture, dl_subtitles=dl_subtitles, chapter_number=args.chapter, chapter_start=args.chapter_start, chapter_end=args.chapter_end, lecture_number=args.lecture, lecture_start=args.lecture_start, lecture_end=args.lecture_end, keep_vtt=args.keep_vtt, skip_hls_stream=args.skip_hls_stream, ) if args.info: udemy_obj.course_listdown( chapter_number=args.chapter, chapter_start=args.chapter_start, chapter_end=args.chapter_end, lecture_number=args.lecture, lecture_start=args.lecture_start, lecture_end=args.lecture_end, skip_hls_stream=args.skip_hls_stream, )
dl_assets=dl_assets, dl_lecture=dl_lecture, dl_subtitles=dl_subtitles, chapter_number=args.chapter, chapter_start=args.chapter_start, chapter_end=args.chapter_end, lecture_number=args.lecture, lecture_start=args.lecture_start, lecture_end=args.lecture_end, keep_vtt=args.keep_vtt, skip_hls_stream=args.skip_hls_stream, ) if args.info: udemy_obj.course_listdown( chapter_number=args.chapter, chapter_start=args.chapter_start, chapter_end=args.chapter_end, lecture_number=args.lecture, lecture_start=args.lecture_start, lecture_end=args.lecture_end, skip_hls_stream=args.skip_hls_stream, ) if __name__ == "__main__": try: main() except KeyboardInterrupt: logger.error(msg="User Interrupted..", new_line=True) sys.exit(0)
def _real_extract(self, url="", skip_hls_stream=False): _udemy = {} course_id, course_info = self._extract_course_info(url) if course_info and isinstance(course_info, dict): title = self._clean(course_info.get("title")) course_title = course_info.get("published_title") portal_name = course_info.get("portal_name") course_json = self._extract_course_json(url, course_id, portal_name) course = course_json.get("results") resource = course_json.get("detail") if resource: if not self._cookies: logger.error( msg= f"Udemy Says : '{resource}' Run udemy-dl against course within few seconds" ) if self._cookies: logger.error( msg=f"Udemy Says : '{resource}' cookies seems to be expired" ) logger.info( msg="Trying to logout now...", new_line=True, ) if not self._cookies: self._logout() logger.info( msg="Logged out successfully.", new_line=True, ) sys.exit(0) _udemy["access_token"] = self._access_token _udemy["course_id"] = course_id _udemy["title"] = title _udemy["course_title"] = course_title _udemy["chapters"] = [] counter = -1 if course: lecture_counter = 0 for entry in course: clazz = entry.get("_class") asset = entry.get("asset") supp_assets = entry.get("supplementary_assets") if clazz == "chapter": lecture_counter = 0 lectures = [] chapter_index = entry.get("object_index") chapter_title = "{0:02d} ".format( chapter_index) + self._clean(entry.get("title")) if chapter_title not in _udemy["chapters"]: _udemy["chapters"].append({ "chapter_title": chapter_title, "chapter_id": entry.get("id"), "chapter_index": chapter_index, "lectures": [], }) counter += 1 elif clazz == "lecture": lecture_counter += 1 lecture_id = entry.get("id") if len(_udemy["chapters"]) == 0: lectures = [] chapter_index = entry.get("object_index") chapter_title = "{0:02d} ".format( chapter_index) + self._clean(entry.get("title")) if chapter_title not in _udemy["chapters"]: _udemy["chapters"].append({ "chapter_title": chapter_title, "chapter_id": lecture_id, "chapter_index": chapter_index, "lectures": [], }) counter += 1 if lecture_id: retVal = [] if isinstance(asset, dict): asset_type = (asset.get("asset_type").lower() or asset.get("assetType").lower()) if asset_type == "article": if (isinstance(supp_assets, list) and len(supp_assets) > 0): retVal = self._extract_supplementary_assets( supp_assets) elif asset_type == "video": if (isinstance(supp_assets, list) and len(supp_assets) > 0): retVal = self._extract_supplementary_assets( supp_assets) elif asset_type == "e-book": retVal = self._extract_ebook(asset) elif asset_type == "file": retVal = self._extract_file(asset) elif asset_type == "presentation": retVal = self._extract_ppt(asset) elif asset_type == "audio": retVal = self._extract_audio(asset) logger.progress( msg="Downloading course information .. ") lecture_index = entry.get("object_index") lecture_title = "{0:03d} ".format( lecture_counter) + self._clean(entry.get("title")) data = asset.get("stream_urls") if data and isinstance(data, dict): sources = data.get("Video") tracks = asset.get("captions") duration = asset.get("time_estimation") sources = self._extract_sources( sources, skip_hls_stream=skip_hls_stream) subtitles = self._extract_subtitles(tracks) sources_count = len(sources) subtitle_count = len(subtitles) lectures.append({ "index": lecture_counter, "lecture_index": lecture_index, "lectures_id": lecture_id, "lecture_title": lecture_title, "duration": duration, "assets": retVal, "assets_count": len(retVal), "sources": sources, "subtitles": subtitles, "subtitle_count": subtitle_count, "sources_count": sources_count, }) else: lectures.append({ "index": lecture_counter, "lecture_index": lecture_index, "lectures_id": lecture_id, "lecture_title": lecture_title, "html_content": asset.get("body"), "extension": "html", "assets": retVal, "assets_count": len(retVal), "subtitle_count": 0, "sources_count": 0, }) _udemy["chapters"][counter]["lectures"] = lectures _udemy["chapters"][counter]["lectures_count"] = len( lectures) elif clazz == "quiz": lecture_id = entry.get("id") if len(_udemy["chapters"]) == 0: lectures = [] chapter_index = entry.get("object_index") chapter_title = "{0:02d} ".format( chapter_index) + self._clean(entry.get("title")) if chapter_title not in _udemy["chapters"]: lecture_counter = 0 _udemy["chapters"].append({ "chapter_title": chapter_title, "chapter_id": lecture_id, "chapter_index": chapter_index, "lectures": [], }) counter += 1 _udemy["chapters"][counter]["lectures"] = lectures _udemy["chapters"][counter]["lectures_count"] = len( lectures) _udemy["total_chapters"] = len(_udemy["chapters"]) _udemy["total_lectures"] = sum([ entry.get("lectures_count", 0) for entry in _udemy["chapters"] if entry ]) return _udemy