Exemplo n.º 1
0
def _get_playlist_videos_html(playlist):
    result = []
    url = "https://www.youtube.com/playlist?list=%s" % playlist
    try:
        response = requests.get(url)
        if response is not None and response.status_code == 200:
            formatter = BeautifulSoup(response.text, "html.parser")
            listing = formatter.find_all(attrs={"class": "pl-video"})
            for item in listing:
                video = dict()
                try:
                    data = item.find(attrs={
                        "class": "yt-thumb-clip"
                    }).find("img")
                    if data.get("data-thumb") is not None:
                        video["thumb"] = data["data-thumb"]
                    else:
                        video["thumb"] = data["src"]
                    data = item.find(attrs={
                        "class": "pl-video-title"
                    }).find("a")
                    video["id"] = _getvid(data["href"])[0]
                    video["url"] = 'https://www.youtube.com%s' % data["href"]
                    video["title"] = data.text.strip()
                    result.append(video)
                except BaseException as ex:
                    commons.warn("Error extracting video from playlist: %s" %
                                 str(ex))
    except ConnectionError as err:
        commons.error("Error connecting to playlist video list: %s" % str(err))
    return result
Exemplo n.º 2
0
def _get_channels_html(sources):
    """
	Get list of dictionary channels (user and standard) in order to display them and to list the content.
	Channel details are collected from HTML website content
	:param sources: list of dictionary channels that contain only basic details for identification
	:return: list of improved dictionary channels that contain details for displaying and querying
	"""
    result = []
    if sources is not None and isinstance(sources, list):
        for item in sources:
            url = "https://www.youtube.com/%s/%s/videos" % (item['type'],
                                                            item['id'])
            channel = dict()
            try:
                response = requests.get(url)
                if response is not None and response.status_code == 200:
                    formatter = BeautifulSoup(response.text, "html.parser")
                    data = formatter.find(
                        attrs={
                            "class": "qualified-channel-title-text"
                        }).find("a")
                    channel['title'] = data.text
                    channel['id'] = item['id']
                    if item['type'] == "user":
                        data = formatter.find(
                            attrs={
                                "class":
                                "channel-header-subscription-button-container"
                            }).find("button")
                        channel['id'] = data["data-channel-external-id"]
                    result.append(channel)
            except BaseException as ex:
                commons.warn("Error preparing channel dictionary: %s" %
                             str(ex))
    return result
Exemplo n.º 3
0
def _get_search_html(query):
    result = []
    try:
        query = str(query).strip().replace(' ', '+')
        response = requests.get(
            "https://www.youtube.com/results?search_query=%s" % query)
        if response is not None and response.status_code == 200:
            formatter = BeautifulSoup(response.text, "html.parser")
            listing = formatter.find_all(attrs={"class": "yt-lockup"})
            for item in listing:
                video = dict()
                try:
                    data = item.find(attrs={
                        "class": "video-thumb"
                    }).find("img")
                    if data.get("data-thumb") is not None:
                        video["thumb"] = data["data-thumb"]
                    else:
                        video["thumb"] = data["src"]
                    data = item.find(attrs={
                        "class": "yt-lockup-title"
                    }).find("a")
                    video["title"] = data.text.strip()
                    video["url"] = 'https://www.youtube.com%s' % data["href"]
                    if data["href"].startswith(
                            "/watch") and data["href"].find("list=") < 0:
                        tmpobj = _getvid(data["href"])
                        video["type"] = tmpobj[1]
                        video["id"] = tmpobj[0]
                    elif data["href"].startswith(
                            "/user") or data["href"].startswith("/channel"):
                        tmpobj = _getcid(data["href"])
                        video["type"] = tmpobj[1]
                        video["id"] = tmpobj[0]
                    elif data["href"].startswith(
                            "/playlist") or data["href"].find("list=") >= 0:
                        tmpobj = _getpid(data["href"])
                        video["type"] = tmpobj[1]
                        video["id"] = tmpobj[0]
                    data = item.find(attrs={"class": "video-time"})
                    if data:
                        video["time"] = data.text
                    if video.get(
                            'type') is not None and video['type'] == "user":
                        data = formatter.find(
                            attrs={"class": "yt-uix-subscription-button"})
                        video['id'] = data["data-channel-external-id"]
                        video["type"] = "channel"
                    result.append(video)
                except BaseException as ex:
                    commons.warn(
                        "Error extracting video from search list: %s" %
                        str(ex))
    except ConnectionError as err:
        commons.error("Error connecting to search video list: %s" % str(err))
    return result
Exemplo n.º 4
0
def _get_trending_html(locale='US'):
    """
	Get the list of most popular (trending) videos using HTML website content
	:param context: modshell provider context
	:return: list of video dictionary objects providing many properties for identification and also for playing
	"""
    result = []
    try:
        response = requests.get("https://www.youtube.com/feed/trending?gl=%s" %
                                locale)
        if response is not None and response.status_code == 200:
            formatter = BeautifulSoup(response.text, "html.parser")
            listing = formatter.find_all(
                attrs={"class": "expanded-shelf-content-item"})
            for item in listing:
                video = dict()
                try:
                    data = item.find(attrs={
                        "class": ["yt-thumb", "video-thumb"]
                    }).find(attrs={
                        "class": "yt-thumb-simple"
                    }).find("img")
                    if data.get("data-thumb") is not None:
                        video["thumb"] = data["data-thumb"]
                    else:
                        video["thumb"] = data["src"]
                    data = item.find(attrs={
                        "class": "yt-lockup-title"
                    }).find("a")
                    video["id"] = _getvid(data["href"])[0]
                    video["url"] = 'https://www.youtube.com%s' % data["href"]
                    video["title"] = data.text
                    data = data.select("span.accessible-description")
                    if len(data) != 0:
                        video["time"] = data[0].text
                    else:
                        video["time"] = ""
                    data = item.find(attrs={"class": "yt-lockup-meta-info"})
                    if data and len(data.contents) > 1:
                        video["date"] = data.contents[0].string
                        video["views"] = data.contents[1].string.split(" ")[0]
                    result.append(video)
                except BaseException as ex:
                    commons.warn(
                        "Error extracting video from trending list: %s" %
                        str(ex))
    except ConnectionError as err:
        commons.error("Error connecting to trending video list: %s" % str(err))
    return result
Exemplo n.º 5
0
def _get_datalist_api(base):
    result = []
    try:
        pageno = 0
        token = ""
        while token is not None and pageno <= 20:
            if token is not None and token != "":
                url = "%s&pageToken=%s" % (base, token)
            else:
                url = base
            response = requests.get(url)
            if response is not None and response.status_code == 200:
                data = json.loads(response.text)
                if data.get('nextPageToken') is not None:
                    token = data['nextPageToken']
                    pageno += 1
                else:
                    token = None
                if data.get('items') is not None:
                    for item in data['items']:
                        video = dict()
                        try:
                            if item['kind'] == "youtube#video":
                                video["id"] = item['id']
                                video["type"] = "video"
                                video[
                                    "url"] = 'https://www.youtube.com/watch?v=%s' % video[
                                        "id"]
                            elif item['kind'] == "youtube#searchResult":
                                if item['id']['kind'] == 'youtube#video':
                                    video["id"] = item['id']['videoId']
                                    video["type"] = "video"
                                    video[
                                        "url"] = 'https://www.youtube.com/watch?v=%s' % video[
                                            "id"]
                                elif item['id']['kind'] == 'youtube#channel':
                                    video["id"] = item['id']['channelId']
                                    video["type"] = "channel"
                                    video[
                                        "url"] = 'https://www.youtube.com/channel/%s' % video[
                                            "id"]
                                elif item['id']['kind'] == 'youtube#playlist':
                                    video["id"] = item['id']['playlistId']
                                    video["type"] = "playlist"
                                    video[
                                        "url"] = 'https://www.youtube.com/playlist?list=%s' % video[
                                            "id"]
                            elif item['kind'] == "youtube#channel":
                                video["id"] = item['id']
                                video["type"] = "channel"
                                video[
                                    "url"] = 'https://www.youtube.com/channel/%s' % video[
                                        "id"]
                            snippet = item["snippet"]
                            video["title"] = snippet['title']
                            if item['kind'] == "youtube#playlistItem" and snippet.get(
                                    'resourceId') is not None:
                                if snippet['resourceId'][
                                        'kind'] == 'youtube#video':
                                    video["id"] = snippet['resourceId'][
                                        'videoId']
                                    video["type"] = "video"
                                    video[
                                        "url"] = 'https://www.youtube.com/watch?v=%s' % video[
                                            "id"]
                            if snippet.get('thumbnails') is not None:
                                thumbnails = snippet['thumbnails']
                                if thumbnails.get('standard') is not None:
                                    video["thumb"] = thumbnails['standard'][
                                        'url']
                                elif thumbnails.get('high') is not None:
                                    video["thumb"] = thumbnails['high']['url']
                                elif thumbnails.get('medium') is not None:
                                    video["thumb"] = thumbnails['medium'][
                                        'url']
                                elif thumbnails.get('default') is not None:
                                    video["thumb"] = thumbnails['default'][
                                        'url']
                            if snippet.get('publishedAt') is not None:
                                video["date"] = snippet['publishedAt']
                            result.append(video)
                        except BaseException as ex:
                            commons.warn(
                                "Error extracting video from API list: %s" %
                                str(ex))
            else:
                token = None
    except ConnectionError as err:
        commons.error("Error connecting to API video list: %s" % str(err))
    return result
Exemplo n.º 6
0
 def warning(self, text):
     commons.warn(text)