def parse_thumbs_tags(self, soup: BeautifulSoup, url: URL): categories = soup.find('div', {'id': 'categories'}) for tag in _iter(categories.find_all('a')): self.add_tag( collect_string(tag), URL(tag.attrs['href'] + '/videos?sort=recent*', base_url=url))
def parse_thumbs(self, soup: BeautifulSoup, url: URL): for thumbnail in _iter( soup.find_all( 'div', {'class': ['video', 'category', 'pornstar', 'serie']})): # psp(thumbnail.prettify()) xref = thumbnail.find('a') if xref: href = URL(xref.attrs['href'], base_url=url) thumb_url = URL(thumbnail.img.attrs['data-original'], base_url=url) label = thumbnail.img.attrs.get('alt', '') duration = thumbnail.find('span', {'class': 'duration'}) dur_time = '' if duration is None else collect_string(duration) hd_span = thumbnail.find('span', {'class': 'hd'}) hd = '' if hd_span is None else str(hd_span.string).strip() self.add_thumb(thumb_url=thumb_url, href=href, popup=label, labels=[{ 'text': dur_time, 'align': 'top right' }, { 'text': label, 'align': 'bottom center' }, { 'text': hd, 'align': 'top left' }])
def parse_thumbs(self, soup: BeautifulSoup, url: URL): container = soup.find('main', {'id': 'main'}) if container: for thumbnail in _iter( container.find_all('div', {'class': 'column'})): # psp(thumbnail.prettify()) xref = thumbnail.find('a') if xref: href = URL(xref.attrs['href'], base_url=url) description = xref.attrs['title'] thumb_url = URL(thumbnail.img.attrs['src'], base_url=url) duration = thumbnail.find('span', {'class': "length"}) dur_time = '' if duration is None else collect_string( duration) quality = thumbnail.find('span', {'class': "quality"}) qual = '' if quality is None else str(quality.string) self.add_thumb(thumb_url=thumb_url, href=href, popup=description, labels=[{ 'text': dur_time, 'align': 'top right' }, { 'text': description, 'align': 'bottom center' }, { 'text': qual, 'align': 'top left', 'bold': True }])
def parse_others(self, soup: BeautifulSoup, url: URL): container = soup.find('div', {'class': 'tag-150-container'}) if container: for thumbnail in _iter(container.find_all('li')): # psp(thumbnail.prettify()) xref = thumbnail.find('a', href=True) if xref: # psp(thumbnail.prettify()) href = URL(xref.attrs['href'], base_url=url) img = thumbnail.img thumb_file = img.attrs.get('data-original', img.attrs.get('src')) thumb_url = URL(thumb_file, base_url=url) label = collect_string(thumbnail) self.add_thumb(thumb_url=thumb_url, href=href, popup=label, labels=[{ 'text': label, 'align': 'bottom center' }]) return pornstars = soup.find('ul', {'class': 'pornstar-tag-list'}) if pornstars: for thumbnail in _iter(pornstars.find_all('li')): # psp(thumbnail.prettify()) xref = thumbnail.find('a', href=True) if xref: # psp(thumbnail.prettify()) href = URL(xref.attrs['href'], base_url=url) img = thumbnail.img thumb_file = img.attrs.get('data-original', img.attrs.get('src')) thumb_url = URL(thumb_file, base_url=url) label = img.attrs.get('alt', '') duration = thumbnail.find('div', {'class': 'video-count'}) dur_time = '' if duration is None else str( duration.string).strip() self.add_thumb(thumb_url=thumb_url, href=href, popup=label, labels=[{ 'text': dur_time, 'align': 'top right' }, { 'text': label, 'align': 'bottom center' }])
def parse_thumbs(self, soup: BeautifulSoup, url: URL): for container in _iter(soup.find_all('ul',{'class':['nThumbsList','catsList','channelsList']})): # psp('cont') for thumbnail in _iter(container.find_all('li')): # psp(thumbnail.prettify()) xref=thumbnail.find('a') if xref: href = URL(xref.attrs['href'], base_url=url) description = xref.img.attrs['alt'].strip() thumb_addr=thumbnail.img.attrs.get('data-original',thumbnail.img.attrs['src']) thumb_url = URL(thumb_addr, base_url=url) duration = thumbnail.find('div', {'class': ["videoDuration",'vidcountSp']}) dur_time = collect_string(duration) if duration else '' quality = thumbnail.find('div', {'class': "hdIcon"}) qual = collect_string(quality) if quality else '' self.add_thumb(thumb_url=thumb_url, href=href, popup=description, labels=[{'text': dur_time, 'align': 'top right'}, {'text': description, 'align': 'bottom center'}, {'text': qual, 'align': 'top left', 'bold': True}])
def parse_thumbs(self, soup: BeautifulSoup, url: URL): for thumbnail in _iter(soup.find_all('div', {'class': ['video-item']})): # psp(thumbnail.prettify()) href = URL(thumbnail.a.attrs['href'], base_url=url) description = thumbnail.a.img.attrs['alt'] thumb_url = URL(thumbnail.img.attrs['src'], base_url=url) duration = thumbnail.find('span', {'class': "duration"}) dur_time = '' if duration is None else collect_string(duration) self.add_thumb(thumb_url=thumb_url, href=href, popup=description, labels=[{'text': dur_time, 'align': 'top right'}, {'text': description, 'align': 'bottom center'}])
def parse_video_tags(self, soup: BeautifulSoup, url: URL): info = soup.find('div', {'class': 'video-info'}) if info: # psp(info.prettify()) for xref in _iter( info.find_all('a', href=lambda x: not 'javascript' in str(x))): psp(xref) href = xref.attrs['href'] if '/user/' in href: self.add_tag(quotes(href, '/user/', '/'), URL(href.replace('/user/', '/submitted/'), base_url=url), style={'color': 'blue'}) else: self.add_tag(collect_string(xref), URL(href, base_url=url))
def parse_video_tags(self, soup: BeautifulSoup, url: URL): # info_box=soup.find('div',{'class':'content-container'}) for info_box in _iter( soup.find_all('div', {'class': 'content-container'})): # psp(info_box.prettify()) for href in _iter(info_box.find_all('a', href=True)): psp(href.prettify()) label = collect_string(href) href_url = URL(href.attrs['href'], base_url=url) print(label, href_url) color = None if href_url.contain('/users/'): color = 'blue' href_url = URL(href_url.get() + '/videos/public/') if href_url.contain('/pornstar/'): color = 'red' self.add_tag(label, href_url, style=dict(color=color))
def parse_video_tags(self, soup: BeautifulSoup, url: URL): def color(xref: str) -> str: colr = None if '/channel/' in xref: colr = 'blue' if '/pornstars/' in xref: colr = 'red' return colr container = soup.find('div', {'class': 'section-title'}) if container: for href in _iter(container.find_all('a', href=True)): xref = href.attrs['href'] self.add_tag(str(href.string), URL(xref, base_url=url), style=dict(color=color(xref))) tags_container = soup.find('ul', {'class': 'video-tag-list'}) if tags_container: for href in _iter(tags_container.find_all('a', href=True)): xref = href.attrs['href'] self.add_tag(collect_string(href), URL(xref, base_url=url), style=dict(color=color(xref)))
def parse_thumbs(self, soup: BeautifulSoup, url: URL): contents = soup.find('div', {'class', 'thumblist'}) if contents: # psp(contents.prettify()) for thumbnail in _iter(contents.find_all('div', {'class': 'video'})): # psp(thumbnail.prettify()) xref = thumbnail.find('a', href=True) href = URL(xref.attrs['href'], base_url=url) thumb_url = URL(thumbnail.img.attrs['src'], base_url=url) label = thumbnail.img.attrs.get('alt', '') duration = thumbnail.find('span', {'class': 'time'}) dur_time = '' if duration is None else collect_string(duration) hd_img = duration.find('img', {'alt': 'HD Video'}) hd = 'HD' if hd_img else '' self.add_thumb( thumb_url=thumb_url, href=href, popup=label, labels=[ { 'text': dur_time, 'align': 'top right' }, # {'text': count, 'align': 'top right'}, { 'text': label, 'align': 'bottom center' }, { 'text': hd, 'align': 'top left' } ])
def parse_pictures_tags(self, soup:BeautifulSoup, url:URL): for tag_container in _iter(self.get_picture_tag_containers(soup)): for href in _iter(tag_container.find_all('a', href=True)): caption=collect_string(href).strip(' |') if caption: self.add_tag(caption, URL(href.attrs['href'], base_url=url))
def parse_thumbs_tags(self, soup: BeautifulSoup, url: URL): menu = soup.find('div', {'class': 'menu-box'}) categories = menu.find('div', {'id': 'categories-subnav-box'}) for tag in _iter(categories.find_all('a')): self.add_tag(collect_string(tag), URL(tag.attrs['href']))
def parse_thumbs_tags(self, soup: BeautifulSoup, url: URL): tags_container = soup.find('div', {'class': 'sidebar'}) if tags_container: for tag in _iter(tags_container.find_all('a',{'href':lambda x: '/categories/' in x})): self.add_tag(collect_string(tag).rstrip('.0123456789'), URL(tag.attrs['href'], base_url=url))