def run(sUrl, sCookieSource=None, nVideoLimit=None, bDebug=False): print(f"Running {sExtractor} extractor for {sUrl}\r\n") if sCookieSource is not None: dl_common.parseCookieFile(sCookieSource) if dl_common.dCookiesParsed is None: print("WARNING :: No cookies were provided! Private videos/playlists will fail to download!\r\n") # Attempt initial connection html = dl_common.session.get(sUrl, headers=dl_common.dHeaders, cookies=dl_common.dCookiesParsed) print(f"Initial connection status: {html.status_code}") if html.status_code == 403: raise ConnectionError(f"403 Forbidden! Please check if cookies are required! Private videos/playlists cannot be accessed without cookies!") elif html.status_code != 200: raise ConnectionError(f"Initial connection failed : Status {html.status_code}") print() if bDebug: # Save HTML content to a text file for debug text_file = open("html_content.txt", "w", encoding='utf-8') text_file.write(html.text) text_file.close() page = Page_Xvideos(sUrl) nPageStatus = page.content.status_code if nPageStatus != 200: if nPageStatus == 403: raise ConnectionError(f"403 Forbidden! Please check if cookies are required! Private videos/playlists cannot be accessed without cookies!") dYdlOptions = dict(dl_common.dYdlOptions) dYdlOptions['download_archive'] = rf".\\sites\\{sExtractor}\\{dYdlOptions['download_archive'].format(sExtractor)}" print() for nIdx, sVideoUrl in enumerate(page.videos): if page.sUrlType == 'playlist': print(f"Processing playlist video {nIdx + 1} of {len(page.videos)} :: {sVideoUrl}") print() dYdlOptions['outtmpl'] = rf'.\\sites\\{sExtractor}\\%(title).125s.%(ext)s' with youtube_dl.YoutubeDL(dYdlOptions) as ydl: ydl.download([sVideoUrl]) if nVideoLimit is not None and (nIdx + 1) >= nVideoLimit: print(f"Hit the specified maximum limit of {nVideoLimit}. Stopping...") break print()
def run(sUrl, sCookieSource=None, nVideoLimit=None, bDebug=False): print(f"Running {sExtractor} extractor for {sUrl}\r\n") if sCookieSource is not None: dl_common.parseCookieFile(sCookieSource) if dl_common.dCookiesParsed is None: print( "WARNING :: No cookies were provided! Private videos/playlists will fail to download!\r\n" ) if 'porntrex.com/video' in sUrl: sUrlType = 'video' elif 'porntrex.com/my' in sUrl: sUrlType = 'playlist' elif 'porntrex.com/search' in sUrl: sUrlType = 'playlist' # Search results can be treated as a playlist else: raise ValueError( f"Unable to determine {sExtractor} URL type for {sUrl}! Please submit a bug report!" ) # Attempt initial connection html = dl_common.session.get(sUrl, headers=dl_common.dHeaders, cookies=dl_common.dCookiesParsed) print(f"Initial connection status: {html.status_code}") if html.status_code == 403: raise ConnectionError( f"403 Forbidden! Please check if cookies are required! Private videos/playlists cannot be accessed without cookies!" ) elif html.status_code != 200: raise ConnectionError( f"Initial connection failed : Status {html.status_code}") print() if bDebug: # Save HTML content to a text file for debug text_file = open("html_content.txt", "w", encoding='utf-8') text_file.write(html.text) text_file.close() lUrlVideos = [] if sUrlType == 'playlist': print("Playlist detected. Getting videos...") sUrlBaseFormat = urlBaseFormatGet(sUrl) nPage = 0 while True: nPage += 1 print(f"Attempting page {nPage:02}") if 'search' in sUrl: if nPage == 1: sUrlPage = sUrlBaseFormat.format('') else: sUrlPage = sUrlBaseFormat.format(f'{nPage}/') else: sUrlPage = sUrlBaseFormat.format(f'{nPage:02}') page = dl_common.Page(sUrlPage) nPageStatus = page.content.status_code if nPageStatus != 200: if nPageStatus == 403: raise ConnectionError( f"403 Forbidden! Please check if cookies are required! Private videos/playlists cannot be accessed without cookies!" ) elif nPageStatus == 404: print(f"Page {nPage} returned 404!") print( f"Assuming page {nPage - 1} was the last page of the playlist" ) break page._extract_video_urls() if page.videos: lUrlVideos += page.videos else: break # Remove non-video URLs that may have been picked up lTemp = [] for sUrl in lUrlVideos: if sUrl == 'https://www.porntrex.com/my/favourites/videos/': continue if 'video' in sUrl: lTemp += [sUrl] lUrlVideos = lTemp nNumVideos = len(lUrlVideos) print(f"Found {nNumVideos} video URLs in the playlist") if bDebug: for sUrl in lUrlVideos: print(sUrl) elif sUrlType == 'video': lUrlVideos = [sUrl] dYdlOptions = dict(dl_common.dYdlOptions) dYdlOptions[ 'download_archive'] = rf".\\sites\\{sExtractor}\\{dYdlOptions['download_archive'].format(sExtractor)}" for nIdx, sVideoUrl in enumerate(lUrlVideos): if sUrlType == 'playlist': print(f"Processing video {nIdx + 1} of {nNumVideos}...") print() if bDebug: print(f"Processing {sVideoUrl}") video = Video(sVideoUrl) dYdlOptions['outtmpl'] = rf'.\\sites\\{sExtractor}\\{video.sFullName}' with youtube_dl.YoutubeDL(dYdlOptions) as ydl: ydl.download([video.downloadUrl]) if nVideoLimit is not None and (nIdx + 1) >= nVideoLimit: print( f"Hit the specified maximum limit of {nVideoLimit}. Stopping..." ) break print()
def run(sUrl, sCookieSource=None, nVideoLimit=None, bDebug=False): print(f"Running {sExtractor} extractor for {sUrl}\r\n") if sCookieSource is not None: dl_common.parseCookieFile(sCookieSource) if dl_common.dCookiesParsed is None: print( "WARNING :: No cookies were provided! Private videos/playlists will fail to download!\r\n" ) # Attempt initial connection dl_common.randomizeHeader() html = dl_common.session.get(sUrl, headers=dl_common.dHeaders, cookies=dl_common.dCookiesParsed) print(f"Initial connection status: {html.status_code}") if html.status_code == 403: raise ConnectionError( f"403 Forbidden! Please check if cookies are required! Private videos/playlists cannot be accessed without cookies!" ) elif html.status_code != 200: raise ConnectionError( f"Initial connection failed : Status {html.status_code}") print() sleepRandom(1, 3) if bDebug: # Save HTML content to a text file for debug text_file = open("html_content.txt", "w", encoding='utf-8') text_file.write(html.text) text_file.close() page = Page_Pornve(sUrl) sleepRandom(3, 5) dYdlOptions = dict(dl_common.dYdlOptions) dYdlOptions['download_archive'] = None for nIdx, sVideoUrl in enumerate(page.videos): if page.sUrlType == 'playlist': print( f"Processing playlist video {nIdx + 1} of {page._nVideos} :: {sVideoUrl}" ) print() # Get the actual video stream info for a video link from a playlist if page.sUrlType == 'playlist': pageVideo = Page_Pornve(sVideoUrl) sVideoName = pageVideo._sVideoName sVideoStreamUrl = pageVideo.videos[0] sPageUrl = pageVideo.url else: sVideoName = page._sVideoName sVideoStreamUrl = page.videos[0] sPageUrl = page.url bRun = True try: with open(sArchive) as archive: if sPageUrl in archive.read(): print(f"Archive already has an entry for {sPageUrl}") print("Skipping...") bRun = False except: pass if bRun: dYdlOptions[ 'outtmpl'] = rf'.\\sites\\{sExtractor}\\{sVideoName}.%(ext)s' with youtube_dl.YoutubeDL(dYdlOptions) as ydl: ydl.cache.remove() ret = ydl.download([sVideoStreamUrl]) # Need to do our own archiving since YTDL will treat everything with the name "index-v1-a1" because # of how the video is extracted in _extract_video_stream # YTDL ret 0 is good, 1 is bad if not ret: with open(sArchive, 'a') as archive: archive.write(sPageUrl + "\r\n") if nVideoLimit is not None and (nIdx + 1) >= nVideoLimit: print( f"Hit the specified maximum limit of {nVideoLimit}. Stopping..." ) break print() sleepRandom(3, 5)
def run(sUrl, sCookieSource=None, nVideoLimit=None, bDebug=False): print(f"Running {sExtractor} extractor for {sUrl}\r\n") if sCookieSource is not None: dl_common.parseCookieFile(sCookieSource) if dl_common.dCookiesParsed is None: print( "WARNING :: No cookies were provided! Private videos/playlists will fail to download!\r\n" ) # 20210619 :: Workaround for https://github.com/ppldl/p_pl_dl/issues/1 dl_common.addCipher("https://spankbang.com") # Attempt initial connection dl_common.randomizeHeader() html = dl_common.session.get(sUrl, headers=dl_common.dHeaders, cookies=dl_common.dCookiesParsed) print(f"Initial connection status: {html.status_code}") if html.status_code == 403: raise ConnectionError( f"403 Forbidden! Please check if cookies are required! Private videos/playlists cannot be accessed without cookies!" ) elif html.status_code != 200: raise ConnectionError( f"Initial connection failed : Status {html.status_code}") print() sleepRandom(1, 3) if bDebug: # Save HTML content to a text file for debug text_file = open("html_content.txt", "w", encoding='utf-8') text_file.write(html.text) text_file.close() page = Page_Spankbang(sUrl) sleepRandom(3, 5) dYdlOptions = dict(dl_common.dYdlOptions) dYdlOptions[ 'download_archive'] = rf".\\sites\\{sExtractor}\\{dYdlOptions['download_archive'].format(sExtractor)}" # dYdlOptions['referer'] = 'https://spankbang.com' # dYdlOptions['user_agent'] = dl_common.dHeaders['User-Agent'] # Not needed - YTDL already has a UA randomizer for nIdx, sVideoUrl in enumerate(page.videos): if page.sUrlType == 'playlist': print( f"Processing playlist video {nIdx + 1} of {page._nVideos} :: {sVideoUrl}" ) print() dYdlOptions[ 'outtmpl'] = rf'.\\sites\\{sExtractor}\\%(title).125s.%(ext)s' with youtube_dl.YoutubeDL(dYdlOptions) as ydl: ydl.cache.remove() ydl.download([sVideoUrl]) if nVideoLimit is not None and (nIdx + 1) >= nVideoLimit: print( f"Hit the specified maximum limit of {nVideoLimit}. Stopping..." ) break print() sleepRandom()
def main(argv): print() if argv.dest is not None: os.chdir(argv.dest) print(f"Working download directory: {os.getcwd()}") sleep(2) print() sSourceCookies = argv.cookies if sSourceCookies is not None: print(f"Cookies source: {sSourceCookies}") if ".txt'" in sSourceCookies: dl_common.parseCookieFile(sSourceCookies) else: dl_common.parseCookies(sSourceCookies) else: print(f"No cookies provided!") sleep(0.5) print() sSourceUrls = argv.input print(f"Using the following input source: {sSourceUrls}") print() sleep(0.5) dSites = { 'lewdthots': False, 'pornhub': False, 'porntrex': False, 'pornve': False, 'spankbang': False, 'xhamster': False, 'xvideos': False, 'youporn': False, } dExtractors = { 'lewdthots': dl_lt, 'pornhub': dl_ph, 'porntrex': dl_pt, 'pornve': dl_pornve, 'spankbang': dl_sb, 'xhamster': dl_xh, 'xvideos': dl_xv, } # Get each URL into a dict dUrlDefs = {} with open(sSourceUrls) as fSourceUrls: sLines = fSourceUrls.readlines() for sLine in sLines: sUrl = sLine.strip() print(f"URL: {sUrl}") for sSite in dSites.keys(): if sSite in sLine: dSites[sSite] = True dUrlDefs[sUrl] = sSite print() print("Detected websites:") print(json.dumps(dSites, indent=4)) print() sleep(2) if argv.only is not None: argv.only = argv.only.lower() if argv.only in dSites.keys(): for key, value in dSites.items(): if argv.only == key: dSites[key] = True else: dSites[key] = False for sUrl, sSite in dUrlDefs.items(): if sSite in dExtractors.keys() and dSites[sSite]: try: dExtractors[sSite].run( sUrl, sCookieSource=None ) # Cookies should already be parsed and available when going through main except: print("\r\n\r\n") traceback.print_exc() print("\r\n\r\n") continue else: print(f"No extractor available for {sSite} - {sUrl}") sleep(0.5) print()
def run(sUrl, sCookieSource=None, nVideoLimit=None, bDebug=False): print(f"Running {sExtractor} extractor for {sUrl}\r\n") if sCookieSource is not None: dl_common.parseCookieFile(sCookieSource) if dl_common.dCookiesParsed is None: print("WARNING :: No cookies were provided! Private videos/playlists will fail to download!\r\n") # Attempt initial connection html = dl_common.session.get(sUrl, headers=dl_common.dHeaders, cookies=dl_common.dCookiesParsed) print(f"Initial connection status: {html.status_code}") if html.status_code == 403: raise ConnectionError(f"403 Forbidden! Please check if cookies are required! Private videos/playlists cannot be accessed without cookies!") elif html.status_code != 200: raise ConnectionError(f"Initial connection failed : Status {html.status_code}") print() if bDebug: # Save HTML content to a text file for debug text_file = open("html_content.txt", "w", encoding='utf-8') text_file.write(html.text) text_file.close() page = Page_Pornhub(sUrl) dYdlOptions = dict(dl_common.dYdlOptions) dYdlOptions['download_archive'] = rf".\\sites\\{sExtractor}\\{dYdlOptions['download_archive'].format(sExtractor)}" # Set options helpful for pornhub # dYdlOptions['retries'] = 10 # dYdlOptions['fragment_retries'] = 10 # dYdlOptions['keep_fragments'] = True # dYdlOptions['skip_unavailable_fragments'] = False # dYdlOptions['external_downloader_args'] = ["-m3u8_hold_counters", "3", "-max_reload", "3"] lFailedUrls = [] def ytdlLoop(lUrls, bLogFailures): nonlocal lFailedUrls for nIdx, sVideoUrl in enumerate(lUrls): print(f"Processing video {nIdx + 1} of {len(lUrls)} :: {sVideoUrl}") print() sVideoId = sVideoUrl.split('view_video.php?viewkey=')[-1] dYdlOptions['outtmpl'] = rf'.\\sites\\{sExtractor}\\{sVideoId}_%(title).125s.mp4' nStart = time() try: with youtube_dl.YoutubeDL(dYdlOptions) as ydl: ydl.download([sVideoUrl]) except: if bLogFailures: print(f"\r\nEncountered some error for URL = {sVideoUrl}") print(f"Adding it to the retry list...") lFailedUrls += [sVideoUrl] continue nStop = time() print(f"\r\nElapsed time for URL = {sVideoUrl}: {round((nStop - nStart) / 60, 2)} minutes\r\n") if nVideoLimit is not None and (nIdx + 1) >= nVideoLimit: print(f"Hit the specified maximum limit of {nVideoLimit}. Stopping...") break print() ytdlLoop(page.videos, bLogFailures=True) if lFailedUrls: print("Retrying URLs that failed...") for sUrl in lFailedUrls: print(sUrl) ytdlLoop(lFailedUrls, bLogFailures=False)