def test_construct(): ob = Playlist(short_test_pl) expected = 'https://www.youtube.com/' \ 'playlist?list=' \ 'PL525f8ds9RvsXDl44X6Wwh9t3fCzFNApw' assert ob.construct_playlist_url() == expected
def parse_all_links(Playlist): url = Playlist.construct_playlist_url() req = request.get(url) # split the page source by line and process each line content = [ x for x in req.split('\n') if 'yt-uix-sessionlink yt-uix-tile-link' in x ] link_list = [x.split('href="', 1)[1].split('&', 1)[0] for x in content] # The above only returns 100 or fewer links # Simulating a browser request for the load more link load_more_url = Playlist._load_more_url(req) while len(load_more_url): # there is an url found req = request.get(load_more_url) load_more = json.loads(req) videos = re.findall( r'href=\"(/watch\?v=[\w-]*)', load_more['content_html'], ) # remove duplicates link_list.extend(list(OrderedDict.fromkeys(videos))) load_more_url = Playlist._load_more_url( load_more['load_more_widget_html'], ) return link_list
def test_construct(): ob = Playlist( 'https://www.youtube.com/watch?v=m5q2GCsteQs&list=' 'PL525f8ds9RvsXDl44X6Wwh9t3fCzFNApw', ) expected = 'https://www.youtube.com/' \ 'playlist?list=' \ 'PL525f8ds9RvsXDl44X6Wwh9t3fCzFNApw' assert ob.construct_playlist_url() == expected
def downloadList(url, maxCount=None, start=None, end=None): print("download Youtube playlist:%s, maxCount:%s" % (url, str(maxCount))) # taskCount = cpu_count() -1 # print("we have %d cpus" % (taskCount + 1)) taskCount = DOWNLOAD_TASK_CUNT pl = Playlist(url) pl.populate_video_urls() videoUrls = pl.video_urls if maxCount: videoUrls = videoUrls[0:maxCount:1] elif start and end: videoUrls = videoUrls[start - 1:end] elif start and end is None: videoUrls = videoUrls[start - 1::] elif start is None and end: videoUrls = videoUrls[:end:] prefix_gen = pl._path_num_prefix_generator() playlistTitle = getPlaylistTitle(pl.construct_playlist_url()) #single thread # for link in videoUrls: # prefix = next(prefix_gen) # print('file prefix is: %s' % prefix) # downloadSingle(link, filename_prefix=prefix, subFolder=playlistTitle) # multiple thread argsArrayList = [] for i in range(0, taskCount): argsArrayList.append([]) i = 0 for link in videoUrls: idx = i % taskCount i += 1 prefix = next(prefix_gen) argsArrayList[idx].append((link, prefix, playlistTitle)) s_linkStatusDic[link] = False downloadListMultipleThread(argsArrayList) times = 1 while hasToDownloadTask(): times += 1 toDownloadFileDic = { k: v for k, v in s_linkStatusDic.items() if v == False } print("=>try %d times, file to download count: %d" % (times, len(toDownloadFileDic))) print(" %s", str(toDownloadFileDic)) downloadListMultipleThread(argsArrayList) print("all download task done.")
def from_playlist_url(url): pli = Playlist(url) pli.parse_links() pli.populate_video_urls() output = dict() output['type'] = 'playlist' output['title'] = pli.title() url = pli.construct_playlist_url() output['url'] = url output['playlist_id'] = _get_playlist_id(url) video_urls = pli.parse_links() output['video_urls'] = video_urls output['video_ids'] = [v.split('=')[1] for v in video_urls] return output