def get_imgs(url, soup=None, cw=None): if soup is None: html = downloader.read_html(url) soup = Soup(hrml) title = get_title(soup, cw) pages = get_pages(url, soup) pages = page_selector.filter(pages, cw) imgs = [] for i, page in enumerate(pages): imgs_already = get_imgs_already('comicwalker', title, page, cw) if imgs_already: imgs += imgs_already continue if cw is not None: if not cw.alive: return cw.setTitle(u'{} {} / {} ({} / {})'.format( tr_(u'읽는 중...'), title, page.title, i + 1, len(pages))) imgs += get_imgs_page(page) return imgs
def get_imgs(url, title, soup=None, session=None, cw=None): print_ = get_print(cw) if soup is None or session is None: session, soup, url = get_soup(url, session) pages = get_pages(url, soup) pages = page_selector.filter(pages, cw) imgs = [] for i, page in enumerate(pages): imgs_already = get_imgs_already('manatoki', title, page, cw) if imgs_already: imgs += imgs_already continue imgs_ = get_imgs_page(page, title, url, session, cw) imgs += imgs_ s = '{} {} / {} ({} / {})'.format(tr_('읽는 중...'), title, page.title, i + 1, len(pages)) print_('{} {}'.format(page.title, len(imgs_))) if cw is not None: if not cw.alive: return cw.setTitle(s) else: print('read page... {} ({})'.format(page.url, len(imgs))) return imgs
def get_imgs(url, title, soup=None, session=None, cw=None, pages=None): if soup is None or session is None: data = get_soup(url, cw=cw) soup, session = data['soup'], data['session'] if pages is None: pages = get_pages(soup, url, cw) pages = page_selector.filter(pages, cw) imgs = [] for i, page in enumerate(pages): dir = os.path.join(get_outdir('manamoa'), title, page.title) print('test dir:', dir) if SKIP and isDoneFolder(dir, page, cw=cw): cw.print_((u'Skip: {}').format(page.title)) for p, img in enumerate(sorted(os.listdir(dir))): img = os.path.join(dir, img) imgs.append(img) continue if cw is not None: if not cw.alive: return cw.setTitle((u'{} {} / {} ({} / {})').format( tr_(u'\uc77d\ub294 \uc911...'), title, page.title, i + 1, len(pages))) imgs += get_imgs_page(page, session, cw) return imgs
def get_imgs(url, title, session, soup=None, cw=None): print_ = get_print(cw) if soup is None: html = downloader.read_html(url, session=session) soup = Soup(html) pages = get_pages(url, soup=soup) print_('pages: {}'.format(len(pages))) pages = page_selector.filter(pages, cw) imgs = [] for i, page in enumerate(pages): imgs_already = get_imgs_already('jmana', title, page, cw) if imgs_already: imgs += imgs_already continue imgs += get_imgs_page(page, url, session, cw) if cw is not None: if not cw.alive: return cw.setTitle((u'{} {} / {} ({} / {})').format( tr_(u'\uc77d\ub294 \uc911...'), title, page.title, i + 1, len(pages))) if not imgs: raise Exception('no imgs') return imgs
def get_imgs(url, soup=None, session=None, cw=None): print_ = get_print(cw) if soup is None or session is None: session, soup = get_soup(url) pages = get_pages(url, soup) pages = page_selector.filter(pages, cw) title = get_title(soup) imgs = [] for i, page in enumerate(pages): dir = os.path.join(get_outdir('manatoki'), title, page.title) print('test dir:', dir) if SKIP and size_folder(dir) > 0: print_('Skip: {}'.format(page.title)) for p, img in enumerate(sorted(os.listdir(dir))): img = os.path.join(dir, img) imgs.append(img) continue imgs_ = get_imgs_page(page, url, session, cw) imgs += imgs_ s = '{} {} / {} ({} / {})'.format(tr_('읽는 중...'), title, page.title, i + 1, len(pages)) print_('{} {}'.format(page.title, len(imgs_))) if cw is not None: if not cw.alive: return cw.setTitle(s) else: print('read page... {} ({})'.format(page.url, len(imgs))) return imgs
def get_imgs_all(url, title, cw=None): pages = get_pages(url) pages = page_selector.filter(pages, cw) imgs = [] for p, page in enumerate(pages): imgs_already = get_imgs_already('webtoon', title, page, cw) if imgs_already: imgs += imgs_already continue imgs += get_imgs(page) msg = tr_(u'\uc77d\ub294 \uc911... {} / {} ({}/{})').format( title, page.title, p + 1, len(pages)) if cw is not None: cw.setTitle(msg) if not cw.alive: break else: print(msg) return imgs
def get_imgs_all(info, title, session, cw=None): pages = info['pages'] pages = page_selector.filter(pages, cw) imgs = [] for p, page in enumerate(pages): if page.serviceType != 'free': continue imgs_already = get_imgs_already('daumtoon', title, page, cw) if imgs_already: imgs += imgs_already continue imgs += get_imgs(page, session, cw) if cw is not None: cw.setTitle( tr_(u'\uc77d\ub294 \uc911... {} / {} ({}/{})').format( title, page.title, p + 1, len(pages))) if not cw.alive: break return imgs
def get_imgs(url, title, soup=None, session=None, cw=None): if soup is None: soup = get_soup(url, cw=cw) if session is None: session = Session() html = read_html(url, session=session) pages = get_pages(soup, url) pages = page_selector.filter(pages, cw) imgs = [] for i, page in enumerate(pages): imgs_already = get_imgs_already('pixiv_comic', title, page, cw) if imgs_already: imgs += imgs_already continue if cw is not None: if not cw.alive: return cw.setTitle((u'{} {} / {} ({} / {})').format(tr_(u'\uc77d\ub294 \uc911...'), title, page.title, i + 1, len(pages))) imgs += get_imgs_page(page, session) return imgs
def get_imgs(url, title, session, soup=None, cw=None): if soup is None: html = downloader.read_html(url, session=session) soup = Soup(html) pages = get_pages(url, session, soup, cw) pages = page_selector.filter(pages, cw) imgs = [] for i, page in enumerate(pages): imgs += get_imgs_page(page, session, cw) s = u'{} {} / {} ({} / {})'.format(tr_(u'읽는 중...'), title, page.title, i + 1, len(pages)) if cw is not None: if not cw.alive: return cw.setTitle(s) else: print(s) return imgs
def get_imgs_all(url, title, cw=None): print_ = get_print(cw) info, pages = get_pages(url, cw) pages = page_selector.filter(pages, cw) imgs = [] for p, page in enumerate(pages): imgs_already = get_imgs_already('navertoon', title, page, cw) if imgs_already: imgs += imgs_already continue imgs_new = get_imgs(page, cw) print_('{}: {}'.format(page.title, len(imgs_new))) imgs += imgs_new if cw is not None: cw.setTitle( tr_(u'\uc77d\ub294 \uc911... {} / {} ({}/{})').format( title, page.title, p + 1, len(pages))) if not cw.alive: break return imgs
def get_imgs_all(info, title, session, cw=None): print_ = get_print(cw) pages = info['pages'] pages = page_selector.filter(pages, cw) imgs = [] for p, page in enumerate(pages): imgs_already = get_imgs_already('daumtoon', title, page, cw) if imgs_already: imgs += imgs_already continue try: imgs += get_imgs(page, session, cw) except NotPaidError: print_('Not paid: {}'.format(page.title)) #3314 continue if cw is not None: cw.setTitle( tr_(u'\uc77d\ub294 \uc911... {} / {} ({}/{})').format( title, page.title, p + 1, len(pages))) check_alive(cw) return imgs
def get_info(url, session, cw=None): print_ = get_print(cw) pages = get_pages(url, session) pages = page_selector.filter(pages, cw) if not pages: raise Exception('no pages') info = {} html = read_html(url, session=session) soup = Soup(html) __NEXT_DATA__ = soup.find('script', id='__NEXT_DATA__') if __NEXT_DATA__: data = json.loads(__NEXT_DATA__.string) tid = data['props']['initialState']['common']['constant']['tid'] print_('tid: {}'.format(tid)) session.cookies['_kptid'] = tid html = read_html(url, session=session) soup = Soup(html) title = soup.find('h2').text.strip() artist = soup.find('meta', {'name': 'author'})['content'] for x in [' ,', ', ']: while x in artist: artist = artist.replace(x, ',') artist = artist.replace(',', ', ') info['artist'] = artist info['title_raw'] = title info['title'] = clean_title('[{}] {}'.format(artist, title)) imgs = [] for i, page in enumerate(pages): if cw is not None: if not cw.alive: return cw.setTitle('{} {} / {} ({} / {})'.format(tr_('읽는 중...'), info['title'], page.title, i + 1, len(pages))) #3463 imgs_already = get_imgs_already('kakaopage', info['title'], page, cw) if imgs_already: imgs += imgs_already continue try: _imgs = get_imgs_page(page, session) e_msg = None except Exception as e: _imgs = [] e_msg = print_error(e)[0] print_('{} {}'.format(page.title, len(_imgs))) if e_msg: print_(e_msg) imgs += _imgs sleep(.2) if not imgs: raise Exception('no imgs') info['imgs'] = imgs return info