def get_title(self): if self.title is not None: return self.title site = format_site_from_url(self.url) try: result = [] def process_website(result): browser = Browser() browser.set_handle_robots(False) browser.open(self.url, timeout=9.00) result.append(browser) thread = threading.Thread(target=process_website, args=(result,)) thread.start() thread.join(timeout=10) if len(result) == 0: raise Exception("browser timedout or failed") browser = result[0] self.title = "[%s] %s" % (site.encode("Utf-8"), encoding_sucks(clean_title(browser.title())).lower().capitalize()) self.langue = get_langue_from_html(browser.response().get_data()) self.save() return self.title except Exception as e: print "Error: fail on %s: %s" % (self.url, e) self.title = "[%s] Error: couldn't fetch the title" % site self.save() return self.title
def read(self): soup = downloader.read_soup(self.url) for div in soup.findAll('div', class_='fileText'): href = urljoin(self.url, div.a['href']) img = Image(href, self.url, len(self.urls)) self.urls.append(img.url) board = self.url.split('/')[3] title = soup.find('span', class_='subject').text id_ = int(self.url.split('/thread/')[1].split('/')[0]) self.title = clean_title(f'[{board}] {title} ({id_})')
def read(self): ui_setting = self.ui_setting cw = self.customWidget print_ = get_print(cw) if self.yt_type == 'video': res = get_resolution() info = get_videos(self.url, type=self.yt_type, max_res=res, only_mp4=False, audio_included=not True, cw=cw) else: abr = get_abr() info = get_videos(self.url, type=self.yt_type, max_abr=abr, cw=cw) videos = info['videos'] cw.enableSegment(overwrite=True) # first video must be valid while videos: video = videos[0] try: video.url() break except Exception as e: print(e) videos.remove(video) else: raise Exception('No videos') if len(videos) > 1: p2f = get_p2f(cw) if p2f: self.single = False self.title = clean_title(info['title']) self.urls = [video.url for video in videos] video = videos[0] self.setIcon(video.thumb) return else: video = videos.pop(0) cw.gal_num = cw.url = video.url._url if videos and cw.alive: s = u', '.join(video.url._url for video in videos) self.exec_queue.put(([s, { 'youtube': cw.format }], 'downButton(cw[0], format_selector=cw[1])')) self.urls.append(video.url) self.artist = video.username self.setIcon(video.thumb) self.title = video.title
def read(self): if '/post/' in self.url: raise errors.Invalid( tr_('개별 다운로드는 지원하지 않습니다: {}').format(self.url)) self._popular = 'search-Popular.' in self.url self.title = clean_title(self.name) qs = query_url(self.url) q = qs['q'][0] for id in get_ids_multi(q, self._popular, self.cw): img = Image(id, self.url) self.urls.append(img.url)
def read(self): ## loop = asyncio.new_event_loop() ## asyncio.set_event_loop(loop) try: info = get_info(self.url, self.cw) for img in info['imgs']: self.urls.append(img.url) self.title = clean_title(info['title']) finally: ## loop.close() pass
def get_title(soup, cw=None): print_ = get_print(cw) for h1 in soup.findAll('h1'): title = h1.text.strip() if title: break else: raise Exception('no title') title_clean = clean_title(title) print_('get_title: "{}"({}) "{}"({})'.format(title, title.encode('utf8'), title_clean, title_clean.encode('utf8'))) return title_clean
def __init__(self, type, url, title, referer, p=0): self.type = type self.url = LazyUrl(referer, lambda _: url, self) ext = os.path.splitext(url.split('?')[0])[1] if ext.lower() == '.php': ext = '.mp4' if type == 'video': self.filename = clean_title('{}{}'.format(title, ext)) else: self.filename = '{}{}'.format(p, ext) self.title = title
def read(self): title = clean_title(self.soup.find('h1').text.strip()) self.title = tr_(u'\uc77d\ub294 \uc911... {}').format(title) imgs = get_imgs_all(self.url, title, cw=self.cw) for img in imgs: if isinstance(img, Image): self.urls.append(img.url) else: self.urls.append(img) self.title = title
def get_video(url, soup=None): if soup is None: html = downloader.read_html(url) soup = Soup(html) video = soup.find('video', id='vjsplayer').find('source').attrs['src'] url_thumb = soup.find('video', id='vjsplayer').attrs['poster'] title = get_title(soup) filename = u'{}.mp4'.format(clean_title(title)) video = Video(video, url_thumb, url, filename) return video
def read(self): token_guild_id_list = self.url.split( "/" ) # 값을 어떻게 받을지 몰라서 일단 나눴어요. discord_이메일/비밀번호/서버아이디 또는 discord_토큰/서버아이디 이런식으로 받게 해놨어요. if len(token_guild_id_list) == 2: token = token_guild_id_list[0] guild_id = token_guild_id_list[1] elif len(token_guild_id_list) == 3: email = token_guild_id_list[0] password = token_guild_id_list[1] guild_id = token_guild_id_list[2] response = self.post_account_info(email, password) account_info = response.json() if response.status_code == 400: if account_info.get("captcha_key"): raise errors.Invalid("먼저 웹 또는 디스코드 앱에서 로그인하신후 캡차를 인증해주세요." ) # 메세지 박스 return하니까 멈춰서 raise로 해놨어요 else: raise errors.Invalid( "이메일 또는 비밀번호가 잘못되었습니다. 확인후 다시 시도해주세요.") else: if not account_info["token"]: raise errors.Invalid( "토큰을 받아오지 못했어요. 2단계인증을 사용중이신경우 토큰을 이용해 요청해주세요.") else: token = account_info["token"] else: raise errors.Invalid("인자값이 더 많이왔어요.") guild_info_response = self.get_emoji_list( token, int(guild_id)) # 토큰과 함께 get요청함 if guild_info_response.status_code != 200: raise errors.Invalid( "정상적인 토큰이 아니거나 서버를 찾을수없어요. 맞는 토큰인지, 해당 서버에 접속해있는지 확인해주세요.") else: guild_info = guild_info_response.json() if guild_info["emojis"]: base_url = "https://cdn.discordapp.com/emojis/" for emoji in guild_info["emojis"]: # 이모지 리스트로 가져옴 if emoji["animated"] is True: # 만약 gif면 gif 다운로드 param = emoji["id"] + ".gif" else: # 아닐경우 png로 param = emoji["id"] + ".png" self.title = clean_title( f'{guild_info["name"]}({guild_info["id"]})' # 폴더 이름은 서버 이름, id ) self.urls.append(base_url + param + "?v=1") # 인자 합치기 else: raise errors.Invalid("해당 서버에는 이모지가 없어요")
def id(self): if self.type_sankaku == 'www': id = u'[www] ' + self.soup.find('h1', class_='entry-title').text.strip() else: qs = query_url(self.url) tags = qs.get('tags', []) tags.sort() id = u' '.join(tags) if not id: id = u'N/A' id = '[{}] '.format(self.type_sankaku) + id return clean_title(id)
def main(): # path = "./asset/Donald-Trump-vs-Barack-Obama-on-Nuclear-Weapons-in-East-Asia.txt" path = "./asset/People-Arent-Upgrading-Smartphones-as-Quickly-and-That-Is-Bad-for-Apple.txt" # path = "./asset/The-Last-Man-on-the-Moon--Eugene-Cernan-gives-a-compelling-account.txt" path_synsets = "./asset/synsets.txt" path_nasari = "./asset/dd-nasari.txt" # Lettura del file Synset ottenuto con lo script titleSynset.py synsets = utils.read_file_synset(path_synsets) # Dizionario di synsets con parola come key e babel synset id come valore word_to_synset = utils.word_to_synset_dict(synsets) # Lettura del file nasari nasari = utils.read_file_nasari(path_nasari) # Lettura file da testare text = utils.read_file(path) # Individuazione di 10 keyword nel file keywords = utils.get_key_words(text) # print(keywords) # Divisione del testo in titolo e paragrafi dictionary = utils.paragraph(text) # Pulizia del titolo con unione dei nomi propri in unico token ed eliminazione delle stop words dictionary = utils.clean_title(dictionary) # print(dictionary) # Determinazione del contesto context = get_context(dictionary["Titolo"], word_to_synset, nasari) # print(context) # context = [] # Determinazione dell'importanza/rank dei paragrafi rank_p = rank_paragraphs(dictionary, context, keywords) rank_p2 = copy.deepcopy(rank_p) print("\n\n\nORIGINAL\n\n\n" + utils.generate_summary(rank_p)) # Creazione riassunti con metodo trivial summary = summarize_trivial( rank_p2, ratio=0.3 ) # Il ratio si può cambiare in base alla percentuale di riassunto print("\n\n\nSUMMARY TRIVIAL\n\n\n" + utils.generate_summary(summary)) # Creazione riassunti con metodo efficiente summary = summarize( rank_p, ratio=0.3 ) # Il ratio si può cambiare in base alla percentuale di riassunto print("\n\n\nSUMMARY\n\n\n" + utils.generate_summary(summary)) # Salvataggio riassunti utils.save_summary(summary)
def read(self): imgs = get_imgs(self.url, self.info, self.cw) for img in imgs: ext = os.path.splitext(img.split('?')[0])[1] if len(imgs) > 1: self.filenames[img] = (u'{:04}{}').format(len(self.urls), ext) else: self.filenames[img] = clean_title(self.name, n=-len(ext)) + ext self.urls.append(img) self.single = len(imgs) == 1 self.referer = self.url self.title = u'{} (imgur_{})'.format(self.name, self.id_)
def name(self): id = self.__info.id title = self.__info.title artist = self.__info.artist title = self.format_title('N/A', id, title, artist, 'N/A', 'N/A', 'Korean', prefix='navertoon_') return clean_title(title)
def name(self): title = self._info['title'] artists = self._info['artists'] artist = artists[0] if artists else 'N/A' title = self.format_title('N/A', ''.join(get_id(self.url)), title, artist, 'N/A', 'N/A', 'Korean', prefix='daumtoon_') return clean_title(title)
def get(self, referer): ext = get_ext(self._url) name = self.format_.replace('id', '###id*').replace( 'page', '###page*').replace('artist', '###artist*').replace('title', '###title*') name = name.replace('###id*', str(self.id_)).replace( '###page*', str(self.p)).replace('###artist*', self.artist).replace('###title*', self.title) self.filename = clean_title(name.strip(), allow_dot=True, n=-len(ext)) + ext return self._url
def name(self): global pss if self._name is None: url = self.url flickr_auth.get_api(url, self.cw) if '/albums/' in url: user, ps = find_ps(url) self._name = u'{} (flickr_album_{}_{})'.format( ps.title, user.id, ps.id) else: user = flickr_api.Person.findByUrl(url) self._name = u'{} (flickr_{})'.format(user.username, user.id) return clean_title(self._name)
def get(self, url): print_ = get_print(self.cw) if self._url: return self._url info = self.info ## ydl = ytdl.YoutubeDL() ## info = ydl.extract_info(url) formats = info['formats'] print(formats) formats = sorted(formats, key=lambda x: int(x.get('abr', 0)), reverse=True) url_audio = None for format in formats: protocol = format['protocol'] print_(u'【{}】 format【{}】 abr【{}】'.format(protocol, format['format'], format.get('abr', 0))) if not url_audio and protocol in ['http', 'https']: url_audio = format['url'] if not url_audio: url_audio = M3u8_stream(formats[0]['url']) self.album_art = False # self.username = info['uploader'] self.title = u'{} - {}'.format(self.username, info['title']) self.filename = u'{}{}'.format( clean_title(self.title, allow_dot=True, n=-4), '.mp3') thumb = None for t in info['thumbnails'][::-1]: width = t.get('width', 1080) if not 100 <= width <= 500: continue url_thumb = t['url'] thumb = BytesIO() try: downloader.download(url_thumb, buffer=thumb) break except Exception as e: print(e) thumb = None self.thumb = thumb self._url = url_audio return self._url
def get_imgs_page(page, title, referer, session, cw): print_ = get_print(cw) #sleep(2) #html = downloader.read_html(page.url, referer, session=session) #soup = Soup(html) # 2183 session, soup, page.url = get_soup(page.url, session) title_page = clean_title( soup.find('span', class_='page-desc').text.strip()) if page.title != title_page: print_('{} -> {}'.format(page.title, title_page)) page.title = title_page views = soup.findAll('div', class_='view-content')\ + soup.findAll('div', class_='view-padding') if not views: raise Exception('no views') hash = re.find(r'''data_attribute *: *['"](.+?)['"]''', soup.html) print_('hash: {}'.format(hash)) if hash is None: raise Exception('no hash') imgs = [] for view in views: if view is None: continue for img in view.findAll('img'): if not isVisible(img): continue src = img.get('data-{}'.format(hash)) src = src or img.get( 'content') # https://manatoki77.net/comic/5266935 if not src: continue img = urljoin(page.url, src) if '/img/cang' in img: continue if '/img/blank.gif' in img: continue img = Image(img, page, len(imgs)) imgs.append(img) ## if not imgs: ## raise Exception('no imgs') return imgs
def read(self): if '/video/' in self.url: res = clf2.solve(self.url, session=self.session, cw=self.cw) soup = Soup(res['html']) title = soup.find('h1', id='post_title').text.strip() self.title = title view = soup.find('div', id='post') video = view.find('video') src = video.find('source')['src'] src = urljoin(self.url, src) video = Video(src, self.url, title, self.session) self.urls.append(video.url) self.single = True return if '/image/' not in self.url: raise NotImplementedError('Not a post') res = clf2.solve(self.url, session=self.session, cw=self.cw) soup = Soup(res['html']) title = soup.find('h2').text paginator = soup.find('div', id='paginator') pages = [self.url] for a in paginator.findAll('a'): href = a.get('href') if not href: continue href = urljoin(self.url, href) if href not in pages: pages.append(href) imgs = [] for i, page in enumerate(pages): if page == self.url: soup_page = soup else: soup_page = downloader.read_soup(page, session=self.session) view = soup_page.find('div', id='post') for img in view.findAll('img'): href = img.parent['href'] href = urljoin(page, href) img = Image(href, page, len(imgs), self.session) imgs.append(img) self.cw.setTitle('{} {} ({} / {})'.format(tr_('읽는 중...'), title, i+1, len(pages))) for img in imgs: self.urls.append(img.url) self.title = clean_title(title)
def id(self): if self.type_sankaku == 'www': id = '[www] ' + self.soup.find('h1', class_='entry-title').text.strip() else: if '/post/show/' in self.url: id = get_id(self.url) else: qs = query_url(self.url) tags = qs.get('tags', []) tags.sort() id = ' '.join(tags) if not id: id = 'N/A' id = '[{}] {}'.format(self.type_sankaku, id) return clean_title(id)
def read(self): outdir = get_outdir('kakuyomu') self.artist = self.info['artist'] title_dir = clean_title(u'[{}] {}'.format(self.artist, self.info['title'])) for page in self.info['pages']: file = os.path.join(outdir, title_dir, page.filename) if os.path.isfile(file): self.urls.append(file) else: self.urls.append(page.file) self.title = title_dir
def extract_section_paragraphs(paragraph, new_json, level=0): """ Formulates the current section in to a cleaner representation with rectified labels. :param paragraph: Current paragraph in the terms of service :param new_json: The document where the new structure is stored for later output. :param level: Whether it is the first or second level heading. :return: """ title = clean_title(paragraph["section"][level], grouped_keys) text = clean_text(paragraph["text"]) if title and text: new_json["level" + str(level + 1) + "_headings"].append({"section": title, "text": text}) return new_json
def __init__(self, filename_base: str, ly_filepath: str, pdf_base: str, toc_entry: str = None): self.filename_base = filename_base self.ly_filepath = ly_filepath self.pdf_base = pdf_base # pdf path without file extension self.pdf_filepath = '{}.pdf'.format(pdf_base) if toc_entry: self.toc_entry = utils.clean_title(toc_entry) else: # If we didn't get an explicit ToC entry, just use the name of the pdf :-/ self.toc_entry = self.pdf_filepath
def __init__(self, type, url, title, referer, p=0, multi_post=False): self.type = type self.url = LazyUrl(referer, lambda _: url, self) ext = get_ext(url) if ext.lower() == '.php': ext = '.mp4' if type == 'video': id_ = re.find('videos/([0-9a-zA-Z_-]+)', referer, err='no video id') self.filename = format_filename(title, id_, ext) #4287 elif type == 'image': name = '{}_p{}'.format(clean_title(title), p) if multi_post else p self.filename = '{}{}'.format(name, ext) else: raise NotImplementedError(type) self.title = title
def read(self): cw = self.cw session = self.session videos = [] tab = ''.join( self.url.replace('pornhubpremium.com', 'pornhub.com', 1).split('?') [0].split('#')[0].split('pornhub.com/')[-1].split('/')[2:3]) if '/album/' in self.url: self.print_('Album') info = read_album(self.url, session=session) self.single = False for photo in info['photos']: self.urls.append(photo.url) self.title = clean_title(info['title']) elif '/photo/' in self.url: self.print_('Photo') info = read_photo(self.url, session=session) for photo in info['photos']: self.urls.append(photo.url) self.title = info['title'] elif tab not in ['', 'videos']: raise NotImplementedError(tab) elif 'viewkey=' not in self.url.lower() and\ '/embed/' not in self.url.lower() and\ '/gif/' not in self.url.lower(): self.print_('videos') info = get_videos(self.url, cw) hrefs = info['hrefs'] self.print_('videos: {}'.format(len(hrefs))) if not hrefs: raise Exception('no hrefs') videos = [Video(href, cw, session) for href in hrefs] video = self.process_playlist(info['title'], videos) self.setIcon(video.thumb) self.enableSegment() else: video = Video(self.url, cw, session) video.url() self.urls.append(video.url) self.setIcon(video.thumb) self.title = video.title self.enableSegment()
def main(): docs = {} batch = 1 for fname in glob.glob('text/*/wiki*', recursive=True): print(fname) with open(fname) as f: in_doc = False cur_doc = {} cur_lines = [] for line in f: if not in_doc: if line.startswith('<doc id="'): in_doc = True doc_id, title = extract_title_id(line) cur_doc['id'] = doc_id cur_doc['title'] = clean_title(title) continue if line.startswith('</doc>'): doc_id = cur_doc['id'] del cur_doc['id'] text = ''.join(cur_lines) cats = RE_CAT.findall(text) cats = [c.split('|')[0].strip() for _, c, _ in cats] if cats: cur_doc['cats'] = cats is_disam = any(disam in text for disam in DISAMS) if is_disam: cur_doc['dis'] = 1 docs[doc_id] = cur_doc in_doc = False cur_doc = {} cur_lines = [] else: cur_lines.append(line) if len(docs) >= 100000: dump_to_json(docs, 'expanded/expanded_{}.json'.format(batch)) docs = {} batch += 1 if docs: dump_to_json(docs, 'expanded/expanded_{}.json'.format(batch)) docs = {}
def parse_synopsis_doc(doc, db): parsed_doc = dict() parsed_doc['title'] = clean_title(doc['Title']) parsed_doc['link'] = doc['Link'] parsed_doc['synopsis_link'] = doc['Synopsis_Link'] parsed_doc['origin'] = "Scraper_public_health_ontario" parsed_doc['journal_string'] = doc['Journal_String'].strip(' \t\r.') parsed_doc['authors'] = doc["Authors"] parsed_doc['abstract'] = find_abstract(doc.get('Abstract')) paper_fs = gridfs.GridFS(db, collection='Scraper_publichealthontario_fs') pdf_file = paper_fs.get(doc['PDF_gridfs_id']) # with open('example.pdf', 'wb') as f: # f.write(pdf_file.read()) # pdf_file.seek(0) try: paragraphs = extract_paragraphs_pdf(BytesIO(pdf_file.read()), return_dicts=True, only_printable=True) except Exception as e: print('Failed to extract PDF %s(%r) (%r)' % (doc['Doi'], doc['PDF_gridfs_id'], e)) traceback.print_exc() paragraphs = [] sections = {} last_sec = None for p in paragraphs: is_heading = 18 < p['bbox'][3] - p['bbox'][1] and p['bbox'][2] - p[ 'bbox'][0] < 230 if is_heading: last_sec = p['text'].lower() sections[last_sec] = [] elif last_sec is not None: sections[last_sec].append(p) parsed_doc['synopsis'] = { 'summary': sections.get('one-minute summary', None), 'additional_info': sections.get('additional information', None), 'pho_reviewer_comments': sections.get('pho reviewers comments', None), } if all(x is None for x in parsed_doc['synopsis'].values()): parsed_doc['synopsis'] = None return parsed_doc
def convert_biorxiv_to_vespa(doc, db): # paper_fs = gridfs.GridFS( # db, collection='Scraper_connect_biorxiv_org_fs') # pdf_file = paper_fs.get(doc['PDF_gridfs_id']) # parsed_content = try_parse_pdf_hierarchy(pdf_file) parsed_content = {} parsed_doc = { 'title': clean_title(doc['Title']), '_id': doc['_id'], 'source': doc['Journal'], 'license': doc['Journal'], 'datestring': doc['Publication_Date'].strftime('%Y-%m-%d'), 'doi': doc['Doi'], 'url': doc['Link'], 'cord_uid': None, 'authors': [], 'bib_entries': None, 'abstract': ' '.join(doc['Abstract']), 'journal': doc['Journal'], 'body_text': parsed_content.get('body', None), 'conclusion': parsed_content.get('conclusion', None), 'introduction': parsed_content.get('introduction', None), 'results': parsed_content.get('result', None), 'discussion': parsed_content.get('discussion', None), 'methods': parsed_content.get('method', None), 'background': parsed_content.get('background', None), 'timestamp': int(doc['Publication_Date'].timestamp()), 'pmcid': None, 'pubmed_id': None, 'who_covidence': None, 'has_full_text': len(parsed_content.get('body', '')) > 0, 'dataset_version': datetime.now().timestamp(), } for person in doc["Authors"]: parsed_doc['authors'].append({ 'first': person['Name']['fn'], 'last': person['Name']['ln'], 'name': f'{person["Name"]["fn"]} {person["Name"]["ln"]}' }) return parsed_doc
def name(self): if self._name is None: parsed_url = urlparse(self.url) qs = parse_qs(parsed_url.query) if 'donmai.us/favorites' in self.url: id = qs.get('user_id', [''])[0] print('len(id) =', len(id), u'"{}"'.format(id)) assert len(id) > 0, '[Fav] User id is not specified' id = u'fav_{}'.format(id) else: tags = qs.get('tags', []) tags.sort() id = u' '.join(tags) if not id: id = u'N/A' self._name = id return clean_title(self._name)
def read(self): checkLogin(self.session) uid, oid, name = get_id(self.url, self.cw) title = clean_title('{} (weibo_{})'.format(name, uid)) for img in get_imgs(uid, oid, title, self.session, cw=self.cw, d=self, parent=self.mainWindow): self.urls.append(img.url) self.filenames[img.url] = img.filename self.title = title
def main(data_dir): redirects = {} batch = 1 for fname in glob.glob(data_dir + '/*/wiki*', recursive=False): print(fname) with open(fname) as f: in_doc = False cur_doc = {} cur_lines = [] for line in f: if not in_doc: if line.startswith('<doc id="'): in_doc = True doc_id, title = extract_title_id(line) cur_doc['id'] = doc_id cur_doc['title'] = clean_title(title) continue if line.startswith('</doc>'): doc_id = cur_doc['id'] del cur_doc['id'] for cur_line in cur_lines: m = RE_REDIRECT.search(cur_line) if m: cur_doc['redirect'] = m.group(1) break if 'redirect' in cur_doc: redirects[doc_id] = cur_doc in_doc = False cur_doc = {} cur_lines = [] else: cur_lines.append(line) if len(redirects) >= 100000: dump_to_json(redirects, 'expanded/expanded_{}.json'.format(batch)) redirects = {} batch += 1 if redirects: dump_to_json(redirects, 'expanded/expanded_{}.json'.format(batch)) redirects = {}
def read(self): file = None files = None title = None if '/users/' in self.url or '/user/' in self.url: type_ = 'videos' try: if self.url.split('/users/')[1].split('/')[1] == 'images': type_ = 'images' except: pass info = read_channel(self.url, type_, self.session, self.cw) title = info['title'] urls = info['urls'] if type_ == 'videos': files = [LazyFile(url, type_, self.session) for url in urls] file = self.process_playlist('[Channel] [{}] {}'.format(type_.capitalize(), title), files) elif type_ == 'images': #4499 files = [] for i, url in enumerate(urls): check_alive(self.cw) files += get_files(url, self.session, multi_post=True, cw=self.cw) #4728 self.title = '{} {} - {} / {}'.format(tr_('읽는 중...'), title, i, len(urls)) title = '[Channel] [{}] {}'.format(type_.capitalize(), title) else: raise NotImplementedError(type_) if file is None: if files is None: files = get_files(self.url, self.session, cw=self.cw) for file in files: self.urls.append(file.url) file = files[0] if file.type == 'youtube': raise errors.Invalid('[iwara] Youtube: {}'.format(self.url)) if file.type == 'image': self.single = False title = title or file.title if not self.single: title = clean_title(title) self.title = title if file.thumb is not None: self.setIcon(file.thumb)