def get_videos(url, type='video', only_mp4=False, audio_included=False, max_res=None, max_abr=None, cw=None): info = {} n = get_max_range(cw) if '/channel/' in url or '/user/' in url or '/c/' in url: info = read_channel(url, n=n, cw=cw) info['type'] = 'channel' info['title'] = u'[Channel] {}'.format(info['uploader']) if cw: info['urls'] = filter_range(info['urls'], cw.range) elif '/playlist' in url: info = read_playlist(url, n=n, cw=cw) info['type'] = 'playlist' info['title'] = u'[Playlist] {}'.format(info['title']) if cw: info['urls'] = filter_range(info['urls'], cw.range) else: info['type'] = 'single' info['urls'] = [url] info['videos'] = [ Video(url, type, only_mp4, audio_included, max_res, max_abr, cw) for url in info['urls'] ] return info
def get_videos(url, cw=None): ''' get_videos ''' print_ = get_print(cw) if '/users/' in url: mode = 'users' username = url.split('/users/')[1].split('/')[0] elif '/pornstar/' in url: mode = 'pornstar' username = url.split('/pornstar/')[1].split('/')[0] elif '/model/' in url: mode = 'model' username = url.split('/model/')[1].split('/')[0] elif '/channels/' in url: mode = 'channels' username = url.split('/channels/')[1].split('/')[0] elif '/playlist/' in url: mode = 'playlist' username = url.split('/playlist/')[1].split('/')[0] else: raise Exception('Not supported url') username = username.split('?')[0].split('#')[0] session = Session() domain = utils.domain(url) if mode in ['pornstar']: url_main = 'https://{}/{}/{}'.format(domain, mode, username) html = downloader.read_html(url_main, session=session) soup = Soup(html) soup = fix_soup(soup, url_main, session, cw) for a in soup.findAll('a'): if '/{}/{}/videos/upload'.format(mode, username) in a.attrs.get('href', ''): free = True break else: free = False print_('free: {}'.format(free)) # Range max_pid = get_max_range(cw, 500) max_pid = min(max_pid, 2000)# html = downloader.read_html(url, session=session) soup = fix_soup(Soup(html), url, session, cw) info = {} # get title h1 = soup.find('h1') if h1: header = 'Playlist' title = h1.find(id='watchPlaylist') else: title = None if not title: header = 'Channel' profile = soup.find('div', class_='profileUserName') wrapper = soup.find('div', class_='titleWrapper') bio = soup.find('div', class_='withBio') title = soup.find('h1', {'itemprop':'name'}) if not title and profile: title = profile.a if not title and wrapper: title = wrapper.h1 if not title and bio: title = bio.h1 if not title: raise Exception('No title') #print(title) info['title'] = '[{}] {}'.format(header, title.text.strip()) token = re.find('''token *= *['"](.*?)['"]''', html) print_('token: {}'.format(token)) # get links hrefs = [] fail = 0 for p in range(1, 1+100): try: if mode in ['users', 'model']: if mode == 'users': url_api = 'https://{}/users/{}/videos/public/'\ 'ajax?o=mr&page={}'.format(domain, username, p) elif mode == 'model': url_api = 'https://{}/model/{}/videos/upload/'\ 'ajax?o=mr&page={}'.format(domain, username, p) r = session.post(url_api) soup = Soup(r.text) if soup.find('h1'): print('break: h1') break elif mode in ['pornstar']: if free: url_api = 'https://{}/{}/{}/videos/upload'\ '?page={}'.format(domain, mode, username, p) soup = downloader.read_soup(url_api, session=session) soup = fix_soup(soup, url_api, session, cw) soup = soup.find('div', class_='videoUList') else: url_api = 'https://{}/{}/{}?page={}'.format(domain, mode, username, p) soup = downloader.read_soup(url_api, session=session) soup = fix_soup(soup, url_api, session, cw) soup = soup.find('ul', class_='pornstarsVideos') elif mode in ['channels']: url_api = 'https://{}/{}/{}/videos?page={}'.format(domain, mode, username, p) soup = downloader.read_soup(url_api, session=session) soup = fix_soup(soup, url_api, session, cw) try: soup = soup.find('div', {'id': 'channelsBody'}).find('div', class_='rightSide') except: break elif mode in ['playlist']: #url_api = 'https://{}/playlist/viewChunked?id={}&offset={}&itemsPerPage=40'.format(domain, username, len(hrefs)) if token is None: raise Exception('no token') url_api = 'https://{}/playlist/viewChunked?id={}&token={}&page={}'.format(domain, username, token, p) soup = downloader.read_soup(url_api, session=session) else: raise NotImplementedError(mode) fail = 0 except Exception as e: print_(e) fail += 1 if fail < 2: continue else: break finally: print_('{} ({})'.format(url_api, len(hrefs))) if cw and not cw.alive: return lis = soup.findAll('li', class_='videoblock') if not lis: print_('break: no lis') break if getattr(soup.find('title'), 'text', '').strip() == 'Page Not Found': print_('Page Not Found') break c = 0 for li in lis: a = li.find('a') href = a.attrs['href'] href = urljoin(url, href) if href in hrefs: continue c += 1 if href.startswith('javascript:'): # Remove Pornhub Premium print(href) continue hrefs.append(href) if c == 0: print('c==0') break print(c) # 1320 if len(hrefs) >= max_pid: break if cw: hrefs = filter_range(hrefs, cw.range) info['hrefs'] = hrefs return info