Пример #1
0
 def _extract_large_course_content(self, url):
     url = url.replace("10000", "50") if url.endswith("10000") else url
     try:
         data = self._session._get(url).json()
     except conn_error as error:
         logger.error(msg=f"Udemy Says: Connection error, {error}")
         time.sleep(0.8)
         sys.exit(0)
     else:
         _next = data.get("next")
         while _next:
             logger.progress(msg="Downloading course information .. ")
             try:
                 resp = self._session._get(_next).json()
             except conn_error as error:
                 logger.error(msg=f"Udemy Says: Connection error, {error}")
                 time.sleep(0.8)
                 sys.exit(0)
             else:
                 _next = resp.get("next")
                 results = resp.get("results")
                 if results and isinstance(results, list):
                     for d in resp["results"]:
                         data["results"].append(d)
         return data
Пример #2
0
    def _extract_subscribed_courses(self):
        def clean_urls(courses):
            _urls = []
            courses = [
                dict(tupleized) for tupleized in set(
                    tuple(item.items()) for item in courses)
            ]
            for entry in courses:
                logger.progress(
                    msg="Fetching all enrolled course(s) url(s).. ")
                url = entry.get("url")
                if not url:
                    continue
                url = f"https://www.udemy.com{url}"
                _urls.append(url)
            _urls = list(set(_urls))
            return _urls

        _temp = []
        try:
            response = self._session._get(SUBSCRIBED_COURSES).json()
        except conn_error as error:
            logger.error(msg=f"Udemy Says: Connection error, {error}")
            time.sleep(0.8)
            sys.exit(0)
        except (ValueError, Exception) as error:
            logger.error(msg=f"Udemy Says: {error}")
            time.sleep(0.8)
            sys.exit(0)
        else:
            results = response.get("results", [])
            _temp.extend(results)
            _next = response.get("next")
            logger.progress(msg="Fetching all enrolled course(s) url(s).. ")
            while _next:
                logger.progress(
                    msg="Fetching all enrolled course(s) url(s).. ")
                try:
                    resp = self._session._get(_next)
                    resp.raise_for_status()
                    resp = resp.json()
                except conn_error as error:
                    logger.error(msg=f"Udemy Says: Connection error, {error}")
                    time.sleep(0.8)
                    sys.exit(0)
                except Exception as error:
                    logger.error(msg=f"Udemy Says: error, {error}")
                    time.sleep(0.8)
                    sys.exit(0)
                else:
                    _next = resp.get("next")
                    results = resp.get("results", [])
                    _temp.extend(results)
        if _temp:
            _temp = clean_urls(_temp)
        return _temp
Пример #3
0
 def clean_urls(courses):
     _urls = []
     courses = [
         dict(tupleized) for tupleized in set(
             tuple(item.items()) for item in courses)
     ]
     for entry in courses:
         logger.progress(
             msg="Fetching all enrolled course(s) url(s).. ")
         url = entry.get("url")
         if not url:
             continue
         url = f"https://www.udemy.com{url}"
         _urls.append(url)
     _urls = list(set(_urls))
     return _urls
Пример #4
0
    def _real_extract(self, url="", skip_hls_stream=False):

        _udemy = {}
        course_id, course_info = self._extract_course_info(url)

        if course_info and isinstance(course_info, dict):
            title = self._clean(course_info.get("title"))
            course_title = course_info.get("published_title")
            portal_name = course_info.get("portal_name")

        course_json = self._extract_course_json(url, course_id, portal_name)
        course = course_json.get("results")
        resource = course_json.get("detail")

        if resource:
            if not self._cookies:
                logger.error(
                    msg=
                    f"Udemy Says : '{resource}' Run udemy-dl against course within few seconds"
                )
            if self._cookies:
                logger.error(
                    msg=f"Udemy Says : '{resource}' cookies seems to be expired"
                )
            logger.info(
                msg="Trying to logout now...",
                new_line=True,
            )
            if not self._cookies:
                self._logout()
            logger.info(
                msg="Logged out successfully.",
                new_line=True,
            )
            sys.exit(0)

        _udemy["access_token"] = self._access_token
        _udemy["course_id"] = course_id
        _udemy["title"] = title
        _udemy["course_title"] = course_title
        _udemy["chapters"] = []

        counter = -1

        if course:
            lecture_counter = 0
            for entry in course:
                clazz = entry.get("_class")
                asset = entry.get("asset")
                supp_assets = entry.get("supplementary_assets")

                if clazz == "chapter":
                    lecture_counter = 0
                    lectures = []
                    chapter_index = entry.get("object_index")
                    chapter_title = "{0:02d} ".format(
                        chapter_index) + self._clean(entry.get("title"))
                    if chapter_title not in _udemy["chapters"]:
                        _udemy["chapters"].append({
                            "chapter_title": chapter_title,
                            "chapter_id": entry.get("id"),
                            "chapter_index": chapter_index,
                            "lectures": [],
                        })
                        counter += 1
                elif clazz == "lecture":
                    lecture_counter += 1
                    lecture_id = entry.get("id")
                    if len(_udemy["chapters"]) == 0:
                        lectures = []
                        chapter_index = entry.get("object_index")
                        chapter_title = "{0:02d} ".format(
                            chapter_index) + self._clean(entry.get("title"))
                        if chapter_title not in _udemy["chapters"]:
                            _udemy["chapters"].append({
                                "chapter_title": chapter_title,
                                "chapter_id": lecture_id,
                                "chapter_index": chapter_index,
                                "lectures": [],
                            })
                            counter += 1

                    if lecture_id:

                        retVal = []

                        if isinstance(asset, dict):
                            asset_type = (asset.get("asset_type").lower()
                                          or asset.get("assetType").lower())
                            if asset_type == "article":
                                if (isinstance(supp_assets, list)
                                        and len(supp_assets) > 0):
                                    retVal = self._extract_supplementary_assets(
                                        supp_assets)
                            elif asset_type == "video":
                                if (isinstance(supp_assets, list)
                                        and len(supp_assets) > 0):
                                    retVal = self._extract_supplementary_assets(
                                        supp_assets)
                            elif asset_type == "e-book":
                                retVal = self._extract_ebook(asset)
                            elif asset_type == "file":
                                retVal = self._extract_file(asset)
                            elif asset_type == "presentation":
                                retVal = self._extract_ppt(asset)
                            elif asset_type == "audio":
                                retVal = self._extract_audio(asset)

                        logger.progress(
                            msg="Downloading course information .. ")
                        lecture_index = entry.get("object_index")
                        lecture_title = "{0:03d} ".format(
                            lecture_counter) + self._clean(entry.get("title"))
                        data = asset.get("stream_urls")
                        if data and isinstance(data, dict):
                            sources = data.get("Video")
                            tracks = asset.get("captions")
                            duration = asset.get("time_estimation")
                            sources = self._extract_sources(
                                sources, skip_hls_stream=skip_hls_stream)
                            subtitles = self._extract_subtitles(tracks)
                            sources_count = len(sources)
                            subtitle_count = len(subtitles)
                            lectures.append({
                                "index": lecture_counter,
                                "lecture_index": lecture_index,
                                "lectures_id": lecture_id,
                                "lecture_title": lecture_title,
                                "duration": duration,
                                "assets": retVal,
                                "assets_count": len(retVal),
                                "sources": sources,
                                "subtitles": subtitles,
                                "subtitle_count": subtitle_count,
                                "sources_count": sources_count,
                            })
                        else:
                            lectures.append({
                                "index": lecture_counter,
                                "lecture_index": lecture_index,
                                "lectures_id": lecture_id,
                                "lecture_title": lecture_title,
                                "html_content": asset.get("body"),
                                "extension": "html",
                                "assets": retVal,
                                "assets_count": len(retVal),
                                "subtitle_count": 0,
                                "sources_count": 0,
                            })

                    _udemy["chapters"][counter]["lectures"] = lectures
                    _udemy["chapters"][counter]["lectures_count"] = len(
                        lectures)
                elif clazz == "quiz":
                    lecture_id = entry.get("id")
                    if len(_udemy["chapters"]) == 0:
                        lectures = []
                        chapter_index = entry.get("object_index")
                        chapter_title = "{0:02d} ".format(
                            chapter_index) + self._clean(entry.get("title"))
                        if chapter_title not in _udemy["chapters"]:
                            lecture_counter = 0
                            _udemy["chapters"].append({
                                "chapter_title": chapter_title,
                                "chapter_id": lecture_id,
                                "chapter_index": chapter_index,
                                "lectures": [],
                            })
                            counter += 1
                    _udemy["chapters"][counter]["lectures"] = lectures
                    _udemy["chapters"][counter]["lectures_count"] = len(
                        lectures)
            _udemy["total_chapters"] = len(_udemy["chapters"])
            _udemy["total_lectures"] = sum([
                entry.get("lectures_count", 0) for entry in _udemy["chapters"]
                if entry
            ])

        return _udemy