def parse_others(self, soup: BeautifulSoup, url: URL): # Categories page categories = dict() for thumbs_container in _iter( soup.find_all('div', {'class': 'list-categories'})): for thumbnail in _iter( thumbs_container.find_all('a', {'class': 'item'})): href = URL(thumbnail.attrs['href'], base_url=url) description = thumbnail.img.attrs['alt'] thumb_url = URL(thumbnail.img.attrs['src'], base_url=url) no = thumbnail.find('div', {'class': 'videos'}) no_of_video = '' if no is None else str(no.string).strip() categories[description] = (href, thumb_url, no_of_video) for description in sorted(categories): (href, thumb_url, no_of_video) = categories[description] self.add_thumb(thumb_url=thumb_url, href=href, popup=description, labels=[{ 'text': no_of_video, 'align': 'top right' }, { 'text': description, 'align': 'bottom center' }])
def parse_thumbs(self, soup: BeautifulSoup, url: URL): for thumb_container in _iter( soup.find_all('div', { 'class': ['box boxTL', 'box boxTR'], 'id': lambda x: x != 'vPromo' })): for thumb in _iter( thumb_container.find_all('div', {'class': 'video'})): # psp(thumb) href = URL(thumb.a.attrs['href'], base_url=url) description = thumb.a.img.attrs['alt'] thumb_url = URL(thumb.img.attrs['src'], base_url=url) duration = thumb.find('b') dur_time = '' if duration is None else str(duration.string) quality = thumb.find('div', {'class': "hSpriteHD"}) qual = '' if quality is None else 'HD' self.add_thumb(thumb_url=thumb_url, href=href, popup=description, labels=[{ 'text': dur_time, 'align': 'top right' }, { 'text': description, 'align': 'bottom center' }, { 'text': qual, 'align': 'top left', 'bold': True }])
def parse_thumbs(self, soup: BeautifulSoup, url: URL): for thumbs in _iter(soup.find_all('ul', {'class': 'thumbs'})): for thumbnail in _iter(thumbs.find_all('li')): xref = thumbnail.find('a', {'class': 'thumb'}) if xref: # psp(thumbnail.prettify()) href = URL(xref.attrs['href'], base_url=url) img = thumbnail.img thumb_file = img.attrs.get('data-original', img.attrs.get('src')) thumb_url = URL(thumb_file, base_url=url) label = img.attrs.get('alt', '') duration = thumbnail.find('time') dur_time = '' if duration is None else str( duration.string).strip() self.add_thumb(thumb_url=thumb_url, href=href, popup=label, labels=[{ 'text': dur_time, 'align': 'top right' }, { 'text': label, 'align': 'bottom center' }])
def parse_thumbs(self, soup: BeautifulSoup, url: URL): for content in _iter(soup.find_all('div', {'class': 'content'})): for thumbnail in _iter(content.find_all('li', {'class': 'thumb'})): href = URL(thumbnail.a.attrs['href'], base_url=url) name = thumbnail.find('span', {'class': 'thumb-title'}) description = '' if name is None else str(name.string) thumb_url = URL(thumbnail.img.attrs['data-original'], base_url=url) duration = thumbnail.find('span', {'class': ['duration', 'counter']}) dur_time = '' if duration is None else str(duration.string) if dur_time != 'Link': self.add_thumb(thumb_url=thumb_url, href=href, popup=description, labels=[{ 'text': dur_time, 'align': 'top right' }, { 'text': description, 'align': 'bottom center' }])
def parse_thumbs_tags(self, soup: BeautifulSoup, url: URL): for tags_container in _iter(soup.find_all('div', {'class': 'sidebar'})): for tag in _iter( tags_container.find_all( 'a', href=lambda x: '/categories/' in str(x))): self.add_tag(str(tag.contents[0]), URL(tag.attrs['href'], base_url=url))
def parse_thumbs_tags(self, soup: BeautifulSoup, url: URL): tags_containers = _iter( soup.find_all('ul', { 'class': ['categories-listing', 'categories-popular-listing'] })) for tags_container in tags_containers: for tag in _iter(tags_container.find_all('a')): self.add_tag(str(tag.attrs['title']), URL(tag.attrs['href'], base_url=url))
def parse_video_tags(self, soup: BeautifulSoup, url: URL): author=soup.find('div',{'class':'post_author_name'}) if author: self.add_tag(str(author.span.string), URL(author.a.attrs['href'], base_url=url), style={'color': 'blue'}) for tags_container in _iter(soup.find_all('div',{'class':'popular_block_header_rl'})): for tag in _iter(tags_container.find_all('a')): self.add_tag(str(tag.b.string).strip(),URL(tag.attrs['href'],base_url=url))
def parse_others(self, soup: BeautifulSoup, url: URL): container = soup.find('div', {'class': 'tag-150-container'}) if container: for thumbnail in _iter(container.find_all('li')): # psp(thumbnail.prettify()) xref = thumbnail.find('a', href=True) if xref: # psp(thumbnail.prettify()) href = URL(xref.attrs['href'], base_url=url) img = thumbnail.img thumb_file = img.attrs.get('data-original', img.attrs.get('src')) thumb_url = URL(thumb_file, base_url=url) label = collect_string(thumbnail) self.add_thumb(thumb_url=thumb_url, href=href, popup=label, labels=[{ 'text': label, 'align': 'bottom center' }]) return pornstars = soup.find('ul', {'class': 'pornstar-tag-list'}) if pornstars: for thumbnail in _iter(pornstars.find_all('li')): # psp(thumbnail.prettify()) xref = thumbnail.find('a', href=True) if xref: # psp(thumbnail.prettify()) href = URL(xref.attrs['href'], base_url=url) img = thumbnail.img thumb_file = img.attrs.get('data-original', img.attrs.get('src')) thumb_url = URL(thumb_file, base_url=url) label = img.attrs.get('alt', '') duration = thumbnail.find('div', {'class': 'video-count'}) dur_time = '' if duration is None else str( duration.string).strip() self.add_thumb(thumb_url=thumb_url, href=href, popup=label, labels=[{ 'text': dur_time, 'align': 'top right' }, { 'text': label, 'align': 'bottom center' }])
def parse_video_tags(self, soup: BeautifulSoup, url: URL): for info_holder in _iter(soup.find_all('div',{'class':'info_holder'})): # psp(info_holder.prettify()) for xref in _iter(info_holder.find_all('a', href=True)): # psp(xref) href=str(xref.attrs['href']) if '/profiles/' in href: self.add_tag(xref.string, URL(href.replace('.html','/videos/')), style=dict(color='blue')) if '/category/' in href or '/search-p**n/' in href or '/channel/' in href: self.add_tag(xref.string, URL(href))
def parse_video_tags(self, soup: BeautifulSoup, url: URL): for actor_container in _iter( soup.find_all('div', {'id': 'video-actors'})): for href in _iter(actor_container.find_all('a')): psp(href) self.add_tag(str(href.attrs['title']), URL(href.attrs['href'], base_url=url), style={'color': 'blue'}) for tag_container in _iter(soup.find_all('div', {'id': 'cat-list'})): for href in _iter(tag_container.find_all('a')): psp(href) self.add_tag(str(href.attrs['title']), URL(href.attrs['href'], base_url=url))
def parse_video_tags(self, soup: BeautifulSoup, url: URL): container = soup.find('div', {'class': 'extra-detail'}) # psp(container.prettify()) for values in _iter(container.find_all('div', {'class': 'values'})): for xref in _iter(values.find_all('a')): # psp(xref) href = URL(xref.attrs['href'], base_url=url) if href.contain('/pornstar/'): style = dict(color='red') else: style = None self.add_tag(str(xref.string), href, style=style)
def parse_video_tags(self, soup: BeautifulSoup, url: URL): # adding user to video user_container = soup.find('div', {'class': 'username'}) href = URL(user_container.a.attrs['href'] + 'videos/', base_url=url) username = str(user_container.a.string).strip() self.add_tag(username, href, style=dict(color='blue')) # adding tags to video for item in _iter(soup.find_all('div', {'class': 'info'})): for href in _iter( item.find_all('a', href=lambda x: '/categories/' in str(x))): if href.string is not None: self.add_tag(str(href.string), URL(href.attrs['href'], base_url=url))
def parse_video_tags(self, soup: BeautifulSoup, url: URL): # adding "user" to video user = soup.find('div', {'class': 'thumb-member-username'}) if user is not None: href = user.find('a').attrs['href'] username = href.rpartition('/')[2] self.add_tag(username + ' uploads', URL('http://motherless.com/u/' + username + '*'), style=dict(color='blue')) self.add_tag(username + ' gals', URL('http://motherless.com/galleries/member/' + username + '*'),style=dict(color='blue')) # adding tags to video for item in _iter(soup.find_all('div', {'id': 'media-tags-container'})): for href in _iter(item.find_all('a')): if href.string is not None: self.add_tag(str(href.string), URL(href.attrs['href'], base_url=url))
def parse_video_tags(self, soup: BeautifulSoup, url: URL): user = soup.find('div', {'class': 'pull-left user-container'}) if user is not None: user_strings = [string for string in user.stripped_strings] label = '{0} {1}'.format(user_strings[0], user_strings[1]) href = user.find('a', href=lambda x: '#' not in x) self.add_tag(label, URL(href.attrs['href'] + '/videos', base_url=url), style={'color':'blue'} ) for tag_container in _iter(soup.find_all('div', {'class': 'm-t-10 overflow-hidden'})): for href in _iter(tag_container.find_all('a')): if href.string is not None: self.add_tag(str(href.string), URL(href.attrs['href'], base_url=url))
def parse_thumbs(self, soup: BeautifulSoup, url: URL): for thumbnail in _iter(soup.find_all('div', {'class': 'video-item'})): private = thumbnail.find('span', {'class': 'ico-private'}) if not private: href = URL(thumbnail.a.attrs['href'], base_url=url) description = thumbnail.img.attrs['alt'] thumb_url = URL(thumbnail.img.attrs['data-original'], base_url=url) duration = thumbnail.find('div', {'class': 'durations'}) dur_time = '' if duration is None else str( duration.contents[-1]).strip() hd_div = thumbnail.find('div', {'class': 'hd-text-icon'}) hd = '' if hd_div is None else str(hd_div.string).strip() self.add_thumb(thumb_url=thumb_url, href=href, popup=description, labels=[{ 'text': dur_time, 'align': 'top right' }, { 'text': description, 'align': 'bottom center' }, { 'text': hd, 'align': 'top left', 'bold': True }])
def parse_video_tags(self, soup: BeautifulSoup, url: URL): info = soup.find('div', {'id': 'info'}) for href in _iter(info.find_all('a')): tag_href = href.attrs['href'] label = href.string if '/videotag/' in tag_href: self.add_tag(label, URL(tag_href))
def parse_thumbs(self, soup: BeautifulSoup, url: URL): contents=soup.find('div', {'class':'videos'}) if contents: for thumbnail in _iter(contents.find_all('div', {'class': 'content'})): # psp(thumbnail.prettify()) xref=thumbnail.find('a',href=True) href = URL(xref.attrs['href'], base_url=url) thumb_url = URL(thumbnail.img.attrs['src'], base_url=url) label=thumbnail.img.attrs.get('alt','') duration = thumbnail.find('div', {'class': 'duration'}) dur_time = '' if duration is None else str(duration.string) hd_span = thumbnail.find('span', {'class': 'hd_video'}) hd = 'HD' if hd_span else '' count_em=thumbnail.find('em') count = '' if count_em is None else str(count_em.string) self.add_thumb(thumb_url=thumb_url, href=href, popup=label, labels=[{'text':dur_time, 'align':'top right'}, {'text': count, 'align': 'top right'}, {'text':label, 'align':'bottom center'}, {'text': hd, 'align': 'top left'}])
def parse_thumbs(self, soup: BeautifulSoup, url: URL): container = soup.find('div', {'class': 'mozaique'}) if container: # psp(container.prettify()) for thumbnail in _iter( container.find_all('div', {'class': 'thumb-block '})): # psp(thumbnail.prettify()) xref = thumbnail.find('a') if xref: # psp(thumbnail.prettify()) href = URL(xref.attrs['href'], base_url=url) script = thumbnail.find('script', text=lambda x: 'img src' in str(x)) thumb_file = quotes(script.text, '<img src="', '"') thumb_url = URL(thumb_file, base_url=url) label = xref.attrs.get('title', '') duration = thumbnail.find('span', {'class': 'duration'}) dur_time = str( duration.string).strip('()') if duration else '' self.add_thumb(thumb_url=thumb_url, href=href, popup=label, labels=[{ 'text': dur_time, 'align': 'top right' }, { 'text': label, 'align': 'bottom center' }])
def parse_pagination(self, soup: BeautifulSoup, url: URL): container = self.get_pagination_container(soup) if container: for page in _iter(container.find_all('a', {'href': True})): if page.string and str(page.string).strip().isdigit(): self.add_page(page.string, URL(page.attrs['href'], base_url=url))
def parse_thumbs(self, soup: BeautifulSoup, url: URL): for thumbnail in _iter(soup.find_all('div', {'class': 'video'})): href = URL(thumbnail.a.attrs['href'], base_url=url) thumb_url = URL(thumbnail.img.attrs['src'], base_url=url) label = thumbnail.img.attrs.get('alt', '') duration = thumbnail.find('span', {'class': 'video-overlay'}) dur_time = '' if duration is None else str(duration.string) hd_span = thumbnail.find('span', {'class': 'hdmovie-icon'}) hd = 'HD' if hd_span else '' self.add_thumb(thumb_url=thumb_url, href=href, popup=label, labels=[{ 'text': dur_time, 'align': 'top right' }, { 'text': label, 'align': 'bottom center' }, { 'text': hd, 'align': 'top left' }])
def parse_video(self, soup: BeautifulSoup, url: URL): video = soup.find('video', {'class': 'video-js'}) if video is not None: for source in _iter(video.find_all('source')): if 'http' in source.attrs.get('src', ''): self.add_video(source.attrs['label'], URL(source.attrs['src']))
def parse_video_tags(self, soup: BeautifulSoup, url: URL): # adding "user" to video user = soup.find('div', {'class': 'user-card'}) if user is not None: href = user.find('a').attrs['href'] username = user.find('span', {'class': 'name'}).string self.add_tag(username, URL(href, base_url=url), style=dict(color='blue')) # adding tags to video for item in _iter(soup.find_all('div', {'class': 'content-tags'})): for href in _iter(item.find_all('a')): if href.string is not None: self.add_tag(str(href.string), URL(href.attrs['href'], base_url=url))
def parse_pagination(self, soup: BeautifulSoup, url: URL): pagination = soup.find('div', {'class': 'pagination'}) if pagination is not None: for page in _iter(pagination.find_all('a', {'class': None})): if page.string.isdigit(): self.add_page(page.string, URL(page.attrs['href'], base_url=url))
def parse_thumbs(self, soup: BeautifulSoup, url: URL): for thumbnail in _iter(soup.find_all('li', {'class': 'thumb-item'})): href = URL(thumbnail.a.attrs['href'], base_url=url) thumb_url = URL(thumbnail.img.attrs['src'], base_url=url) label = thumbnail.img.attrs.get('alt', '') duration = thumbnail.find('span', {'class': 'fs11 viddata flr'}) dur_time = '' if duration is None else str(duration.contents[-1]) hd_span = thumbnail.find('span', {'class': 'text-active bold'}) hd = '' if hd_span is None else str(hd_span.string) self.add_thumb(thumb_url=thumb_url, href=href, popup=label, labels=[{ 'text': dur_time, 'align': 'top right' }, { 'text': label, 'align': 'bottom center' }, { 'text': hd, 'align': 'top left' }])
def parse_pagination(self, soup: BeautifulSoup, url: URL): pagination = soup.find('div', {'id': 'center_control'}) if pagination is not None: for page in _iter(pagination.find_all('a')): href=page.attrs['href'] num= href.rpartition('/')[2].partition('.')[0] self.add_page(num, URL(href, base_url=url))
def parse_video(self, soup: BeautifulSoup, url: URL): videos_container = soup.find('div', {'id': 'videos_container'}) video_container = soup.find('div', {'itemprop': 'video'}) if videos_container: usss=soup.find('script',text=lambda x: 'usss' in str(x)) uid=quotes(usss.string.replace(' ',''),'usss[0]="','"') xhr_list=list() part=1 for data_div in _iter(videos_container.find_all('div', {'class':'pl_vid_el'})): data = {'uid': uid, 'source': data_div.attrs['data-source'], 'hash': data_div.attrs['data-hash'], 'x': data_div.attrs['data-x'], 'oid': data_div.attrs['data-oid'], 'pid': data_div.attrs['data-pid']} xhr=URL('/php/get_vlink.php',base_url=url,method='POST',post_data=data) xhr_list.append(YPData(xhr,' - Part {0}'.format(part))) part += 1 loader=self.model.loader load_process=loader.get_new_load_process(self.add_part) self._result_type = 'video' load_process.load_list(xhr_list) elif video_container: video=video_container.find('video') if video: self.add_video('default', URL(video.attrs['src'], base_url=url))
def parse_thumbs_tags(self, soup: BeautifulSoup, url: URL): panel = soup.find('div', {'class': 'panel'}) if panel: for categorie in _iter(panel.find_all('a')): label = str(categorie.contents[0]).strip() href = URL(categorie.attrs['href'], base_url=url) self.add_tag(label, href)
def parse_thumbs_tags(self, soup: BeautifulSoup, url: URL): categories = soup.find('div', {'id': 'categories'}) for tag in _iter(categories.find_all('a')): self.add_tag( collect_string(tag), URL(tag.attrs['href'] + '/videos?sort=recent*', base_url=url))
def parse_thumbs(self, soup: BeautifulSoup, url: URL): container = soup.find('div', {'class': 'list-videos'}) if container is not None: for thumbnail in _iter(container.find_all('div', {'class': 'item'})): # psp(thumbnail.prettify()) href = URL(thumbnail.a.attrs['href'], base_url=url) thumb_url = URL(thumbnail.img.attrs['data-original'], base_url=url) label = thumbnail.img.attrs.get('alt', '') duration = thumbnail.find('div', {'class': 'duration'}) dur_time = '' if duration is None else str( duration.contents[-1]) self.add_thumb(thumb_url=thumb_url, href=href, popup=label, labels=[{ 'text': dur_time, 'align': 'top right' }, { 'text': label, 'align': 'bottom center' }])
def parse_thumbs(self, soup: BeautifulSoup, url: URL): for post in _iter(soup.find_all('div', {'class':'post_el'})): try: author=post.find('a',{'class':'span_author_name'}) author_str=str(author.span.string).strip() combo=post.find('div',{'class':'combo_post_wrap'}) if combo: href = URL(combo.a.attrs['href'], base_url=url) description = combo.a.attrs['title'] thumb_url = URL(combo.img.attrs['src'], base_url=url) dur_time = 'Multi' else: vid_container = post.find('div', {'class': 'vid_container'}) href = URL(vid_container.a.attrs['href'], base_url=url) description = vid_container.a.attrs['title'] thumb_url = URL(vid_container.img.attrs['src'], base_url=url) duration = post.find('span', {'class': "duration_small"}) dur_time = '' if duration is None else str(duration.string) self.add_thumb(thumb_url=thumb_url, href=href, popup=description, labels=[{'text': dur_time, 'align': 'top right'}, {'text': author_str, 'align': 'top left'}, {'text': description, 'align': 'bottom center'}]) except AttributeError: pass