def load_to_file(self, file: FLData) -> FLData: if file.find_redirect_location: file.redirect_location = self.get_redirect_location(file.url) return file if file.overwrite or (not os.path.exists(file.filename)): result = self.open(file.url) if file.filename is None or file.filename is '': file.text = result.decode(errors='ignore') return file path = os.path.dirname(file.filename) if not os.path.exists(path): os.makedirs(path) buf = io.BytesIO(result) try: with open(file.filename, 'wb') as fd: chunk = buf.read(256) while len(chunk) > 0: fd.write(chunk) chunk = buf.read(256) except FileNotFoundError as err: print(err) return file
def parse_video(self, soup: BeautifulSoup, url: URL): video_container = soup.find('div', id='video') if video_container: video = video_container.find('video', src=True) if video: src = str(video.attrs['src']) self.add_video('default', URL(src)) return frame = video_container.find('iframe', src=True) if frame: src = str(frame.attrs['src']) code = src.rpartition('embed.php?f=')[2] data = {'data': code} loader_url = URL( 'http://donfreeporn.com/wp-content/themes/detube-noedit/Htplugins/Loader.php', method='POST', post_data=data) filedata = FLData(loader_url, '') self._result_type = 'video' self.model.loader.start_load_file(filedata, self.continue_parse_video)
def parse_video(self, soup: BeautifulSoup, url: URL): player_container = soup.find('div', {'id': 'player'}) if player_container: script = player_container.find( 'script', text=lambda text: 'jwplayer' in str(text)) if script: text = script.string if 'sources:[' in text: sources = quotes(text, 'sources:[', ']') j = json.loads('[' + sources + ']') for j_data in j: if j_data['file'] is not '': self.add_video(j_data['label'], URL(j_data['file'] + '*')) self.set_default_video(-1) elif 'sources:' in text: container = soup.find('div', {'class': 'content-video'}) if container: script = container.find( 'script', {'src': lambda x: '/player/' in str(x)}) if script: script_url = URL(script.attrs['src'], base_url=url) filedata = FLData(script_url, '') self._result_type = 'video' self.model.loader.start_load_file( filedata, self.continue_parse_video)
def parse_video(self, soup: BeautifulSoup, url: URL): script = soup.find('script', text=lambda x: 'angular.' in str(x)) if script: json_file_url = URL(quotes(script.string.replace(' ', ''), "host:'", "'"), base_url=url) filedata = FLData(json_file_url, '') self._result_type = 'video' self.model.loader.start_load_file(filedata, self.continue_parse_video)
def parse_thumbs(self, soup: BeautifulSoup, url: URL): if not url.any_data: self._parse_thumbs(soup,url) else: self.waiting_data = True self._result_type = 'thumbs' xhr_url=url.any_data['xhr'] xhr_url.any_data=dict(base=url) filedata = FLData(url.any_data['xhr'], '') self.model.loader.start_load_file(filedata, self.parse_thumbs_xhr)
def parse_video(self, soup: BeautifulSoup, url: URL): video_container = soup.find('div', {'class': 'videoContainer'}) if video_container: # psp(video_container) source_file = URL(video_container.iframe.attrs['src'], base_url=url, referer=url) filedata = FLData(source_file, '') self._result_type = 'video' self.model.loader.start_load_file(filedata, self.continue_parse_video)
def goto_url(self, url: URL, **options): if url.any_data: self.log('Goto url:', url, '/', url.any_data) else: self.log('Goto url:', url) self.url = url self.start_options = options # print(options) loader = self.model.loader filedata = FLData(url, '') loader.start_load_file(filedata, self.on_load_url)
def parse_video(self, soup: BeautifulSoup, url: URL): content = soup.find('div', {'class': 'player-holder'}) if content is not None: script = content.find('script', text=lambda x: 'video_url:' in str(x)) if script is not None: data = str(script.string).replace(' ', '') file = quotes(data, "video_url:'", "'") source_file = URL(file + '*', base_url=url) filedata = FLData(source_file, '', find_redirect_location=True) self._result_type = 'video' self.model.loader.start_load_file(filedata, self.continue_parse_video)
def parse_thumbs(self, soup: BeautifulSoup, url: URL): if not url.any_data: container = soup.find('ul', {'class': 'videoList'}) if container: for thumbnail in _iter( container.find_all('div', {'class': 'video-box-wrapper'})): # psp(thumbnail.prettify()) xref = thumbnail.find('a') if xref: # psp(xref.prettify()) href = URL(xref.attrs['href'], base_url=url) thumb_url = URL(xref.img.attrs['data-srcmedium'], base_url=url) label = thumbnail.img.attrs.get('alt', '') # duration = thumbnail.find('span', {'class': 'fs11 viddata flr'}) # dur_time = '' if duration is None else str(duration.contents[-1]) dur_time = xref.attrs['data-duration'] # hd_span = thumbnail.find('span', {'class': 'text-active bold'}) hd = '' #if hd_span is None else str(hd_span.string) self.add_thumb(thumb_url=thumb_url, href=href, popup=label, labels=[{ 'text': dur_time, 'align': 'top right' }, { 'text': label, 'align': 'bottom center' }, { 'text': hd, 'align': 'top left' }]) else: # psp(url.any_data) self.waiting_data = True self._result_type = 'thumbs' json_file_url = url.any_data['json_file_url'] json_file_url.any_data = dict(first_page_url=url) filedata = FLData(json_file_url, '') self.model.loader.start_load_file(filedata, self.parse_thumbs_json)
def parse_video_tags(self, soup: BeautifulSoup, url: URL): container = soup.find('div', {'class': 'infoBlock'}) # psp(container.prettify()) owner = container.find('a', {'class': 'owner'}) if owner: # psp(owner) self.add_tag(str(owner.string), URL(owner.attrs['href'], base_url=url), style=dict(color='blue')) filename = container.find('div', {'class': 'ibTrigger'}) info_url = URL('/details/' + filename.attrs['data-slug'], base_url=url) self.waiting_data = True filedata = FLData(info_url, '') self.model.loader.start_load_file(filedata, self.continue_parse_video_tags)
def generate_pictures_view(self): if self.waiting_data: return if Setting.debug_site: print('Parsing time {0:.2f} s'.format(time() - self.start_time)) view = self.start_options.get('current_full_view', None) if not view: view = self.model.view_manager.new_full_view() view.subscribe_to_history_event(self.model.full_history.add) flags = self.start_options.get('flags') loader = self.model.loader.get_new_load_process( on_load_handler=lambda fl_data: view.add_picture(fl_data.filename)) view.prepare(url=self.url, title=self.title, tooltip=self.url.get(), on_stop=loader.abort, flags=flags, max_progress=len(self.pictures)) pictures_list = list() for picture in self.pictures: filename = Setting.pictures_path + picture['file'].strip('/') pictures_list.append( FLData(picture['url'], filename, overwrite=False)) loader.load_list(pictures_list) self.add_controls_to_view(view) if Setting.debug_site: print() self.log('View', len(self.pictures), 'pictures on', self.url) # todo сделать отладочный вывод
def continue_parse_video(self, fldata: FLData): playlist_file = URL( quotes(fldata.text, "jwplayer().load('", "'") + '*') filedata = FLData(playlist_file, '') self.model.loader.start_load_file(filedata, self.continue_parse_video2)
def parse_thumbs(self, soup: BeautifulSoup, url: URL): thumbnail_containers = soup.find_all( 'ul', {'class': ['thumb-list', 'video-listing']}) channel_containers = soup.find_all('ul', {'class': ['channels-list']}) stars_containers = soup.find_all('ul', {'class': ['pornStarsThumbs']}) if thumbnail_containers and len(thumbnail_containers) > 0: # parce thumbnail page for thumbnail_container in thumbnail_containers: for thumbnail in _iter(thumbnail_container.find_all('li')): try: # psp(thumbnail.prettify()) href = URL(thumbnail.a.attrs['href'], base_url=url) thumb_url = URL(thumbnail.img.attrs['data-src'], base_url=url) label = thumbnail.img.attrs.get('alt', '') duration = thumbnail.find('span', { 'class': ['widget-video-duration', 'video-duration'] }) dur_time = '' if duration is None else str( duration.string).strip() hd_span = thumbnail.find( 'span', {'class': ['hd-video-icon', 'hd-video']}) hd = '' if hd_span is None else ' HD' self.add_thumb(thumb_url=thumb_url, href=href, popup=label, labels=[{ 'text': dur_time, 'align': 'top right' }, { 'text': label, 'align': 'bottom center' }, { 'text': hd, 'align': 'top left' }]) except KeyError: pass elif channel_containers is not None and len(channel_containers) > 0: # parce channels page for channel_container in channel_containers: for channel in _iter(channel_container.find_all('li')): href = URL(channel.a.attrs['href'], base_url=url) logo = channel.find('span', {'class': 'channel-logo'}) img = logo.find('img') if img is None: img = channel.find('img') thumb_url = URL(img.attrs.get('data-src', img.attrs['src']), base_url=url) label = channel.img.attrs.get('alt', '') num_videos_span = channel.find( 'span', text=lambda x: 'videos' in str(x)) num_videos = '' if num_videos_span is None else str( num_videos_span.string).strip() self.add_thumb(thumb_url=thumb_url, href=href, popup=label, labels=[{ 'text': num_videos, 'align': 'top right' }, { 'text': label, 'align': 'bottom center' }]) elif stars_containers is not None and len(stars_containers) > 0: # unlock http://www.pornhub.com/ self.model.loader.start_load_file( FLData(URL('http://pornhub.com/', test_string='Tube'), ''), on_result=lambda x: self.continue_with_stars( FLData(url, ''), soup)) self.waiting_data = True # adding tags to stars page tags_containers = _iter( soup.find_all('ul', {'class': ['abc-categories']})) for tags_container in tags_containers: for tag in _iter(tags_container.find_all('a')): # print(tag) self.add_tag(str(tag.string), URL(tag.attrs['href'], base_url=url))