def get_videos(url, cw=None):
    print_ = get_print(cw)
    info = {}
    user_id = re.find(r'twitch.tv/([^/?]+)', url, err='no user_id')
    print(user_id)
    session = Session()
    r = session.get(url)
    s = cut_pair(re.find(r'headers *: *({.*)', r.text, err='no headers'))
    print(s)
    headers = json_loads(s)

    payload = [{
        'operationName': 'ClipsCards__User',
        'variables': {
            'login': user_id,
            'limit': 20,
            'criteria': {
                'filter': 'ALL_TIME'
            }
        },
        'extensions': {
            'persistedQuery': {
                'version':
                1,
                'sha256Hash':
                'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777'
            }
        },
    }]
    videos = []
    cursor = None
    cursor_new = None
    while True:
        if cursor:
            payload[0]['variables']['cursor'] = cursor
        r = session.post('https://gql.twitch.tv/gql',
                         json=payload,
                         headers=headers)
        #print(r)
        data = r.json()
        for edge in data[0]['data']['user']['clips']['edges']:
            url_video = edge['node']['url']
            info['name'] = edge['node']['broadcaster']['displayName']
            video = Video(url_video)
            video.id = int(edge['node']['id'])
            videos.append(video)
            cursor_new = edge['cursor']
        print_('videos: {} / cursor: {}'.format(len(videos), cursor))
        if cursor == cursor_new:
            print_('same cursor')
            break
        if cursor_new is None:
            break
        cursor = cursor_new
    if not videos:
        raise Exception('no videos')
    info['videos'] = sorted(videos, key=lambda video: video.id, reverse=True)
    return info
def get_imgs(id, title, session, type=None, cw=None):
    print_ = get_print(cw)
    if type is None:
        type = 'projects'
    referer = 'https://www.artstation.com/{}'.format(id)
    html = downloader.read_html(referer, session=session)
    print(session.cookies.keys())
    datas = []
    p = 1
    while p < 1000:
        url = 'https://www.artstation.com/users/{}/{}.json?page={}'.format(
            id, type, p)
        print(url)
        for try_ in range(4):
            try:
                html = downloader.read_html(url,
                                            session=session,
                                            referer=referer)
                break
            except Exception as e:
                print(e)

        else:
            raise

        j = json.loads(html)
        data = j['data']
        if not data:
            break
        datas += data
        if cw:
            if not cw.alive:
                return []
            cw.setTitle(('{}  {} - {}').format(tr_('페이지 읽는 중...'), title,
                                               len(datas)))
        else:
            print(len(datas))
        p += 1

    imgs = []
    i = 0
    while i < len(datas):
        data = datas[i]
        date = data['created_at'][2:10]
        post_url = data['permalink']
        print('post_url', post_url)
        id_art = get_id_art(post_url)
        imgs += get_imgs_page(id_art, session, date=date, cw=cw)
        if cw:
            if not cw.alive:
                return []
            cw.setTitle(('{}  {} - {}').format(tr_('이미지 읽는 중...'), title,
                                               len(imgs)))
        else:
            print(len(imgs))
        i += 1

    return imgs
def get_imgs_page(id_art, session, date=None, cw=None):
    print_ = get_print(cw)
    url_json = 'https://www.artstation.com/projects/{}.json'.format(id_art)
    post_url = 'https://www.artstation.com/artwork/{}'.format(id_art)
    try:
        html = downloader.read_html(url_json,
                                    session=session,
                                    referer=post_url)
        data = json.loads(html)
        imgs_ = data['assets']
    except Exception as e:
        print_(print_error(e)[(-1)])
        return []

    if date is None:
        date = data['created_at'][2:10]

    imgs = []
    for page, img in enumerate(imgs_):
        if not img['has_image']:
            print('no img')
            continue
        url = None
        video = None
        embed = img.get('player_embedded')
        if embed:
            soup = Soup(embed)
            url_embed = soup.find('iframe').attrs['src']
            print_('embed: {}'.format(url_embed))
            try:
                html = downloader.read_html(url_embed,
                                            session=session,
                                            referer=post_url)
                soup = Soup(html)
                url = soup.find('video').find('source').attrs['src']
            except Exception as e:
                pass
            if not url:
                try:
                    url = soup.find('link', {'rel': 'canonical'}).attrs['href']
                    print_('YouTube: {}'.format(url))
                    raise Exception('YouTube')
##                    from extractor import youtube_downloader
##                    video = youtube_downloader.Video(url, cw=cw)
                except Exception as e:
                    print(e)
                    url = None
        if not url:
            url = img['image_url']
        if video:
            img = video
        else:
            img = Image(post_url, date, url, page)

        img.data = data  #
        imgs.append(img)

    return imgs
def get_imgs_from_illust(illust, api=None, types={'illust', 'manga', 'ugoira'}, format=None, format_name=None, dir='', print_=None, cw=None):
    print('get_imgs_from_illust', api, types, format, format_name, dir)
    print_ = get_print(cw)
    if api is None:
        api = pixiv_auth.get_api()
    if types is not None and illust.get('type', 'illust') not in types:
        return []
    imgs = []
    if illust.type == 'ugoira':
        sleep(0.2)
        for try_ in range(N_TRY):
            print_(('read ugoira... {}').format(illust.id))
            try:
                ugoira_data = api.ugoira_metadata(illust.id, req_auth=True)
                error = ugoira_data.get('error')
                if error:
                    raise PixivError(error)
                break
            except PixivError as e:
                api = e.api
                print_(e)
                msg = error.get('user_message', '')
                if u'公開制限エラー' in msg:
                    print_('invalid ugoira; ignore')
                    return []
                if u'該当作品の公開レベルにより閲覧できません' in msg:
                    print_('invalid ugoira (2); ignore')
                    return []
                if try_ < N_TRY - 1:
                    print_('retry...')
                sleep(SLEEP, cw)
        else:
            raise

        ugoira_data = ugoira_data.ugoira_metadata
        url = ugoira_data.zip_urls.medium.replace('600x600', '1920x1080')
        img = Img(illust, url, ugoira_data=ugoira_data, format_name=format_name)
        if format is not None:
            filename = os.path.join(dir, img.filename)
            filename = os.path.splitext(filename)[0] + '.' + format
            filename_old = os.path.join(dir, ('{}_ugoira1920x1080.{}').format(img.id, format))
            if os.path.isfile(filename_old) and not os.path.isfile(filename):
                print_((u'rename: {} -> {}').format(os.path.basename(filename), os.path.basename(filename)))
                os.rename(filename_old, filename)
            if os.path.isfile(filename):
                print_((u'skip ugoira: {}').format(filename))
                img = Img(illust, filename, ugoira_data=ugoira_data, format_name=format_name)
        imgs.append(img)
    elif illust.page_count == 1:
        img = Img(illust, illust.meta_single_page.original_image_url, format_name=format_name)
        imgs.append(img)
    else:
        pages = illust.meta_pages
        for page in pages:
            img = Img(illust, page.image_urls.original, format_name=format_name)
            imgs.append(img)

    return imgs
示例#5
0
def get_imgs(url, title=None, cw=None):
    url = clean_url(url)
    if 's=view' in url and 'page=favorites' not in url:
        raise NotImplementedError('Not Implemented')

    tags = get_tags(url)
    tags = quote(tags, safe='/')
    tags = tags.replace('%20', '+')
    url = 'https://gelbooru.com/index.php?page=post&s=list&tags={}'.format(tags)

    print_ = get_print(cw)

    # Range
    max_pid = get_max_range(cw)

    imgs = []
    ids = set()
    count_no_imgs = 0
    for p in range(500): #1017
        url = setPage(url, len(ids))
        print_(url)
        html = downloader.read_html(url)

        soup = Soup(html)
        posts = soup.findAll(class_='thumbnail-preview')
        imgs_new = []
        for post in posts:
            id_ = int(re.find('[0-9]+', post.find('a')['id'], err='no id'))
            if id_ in ids:
                print('duplicate:', id_)
                continue
            ids.add(id_)
            url_img = urljoin(url, post.find('a')['href'])
            img = Image(id_, url_img)
            imgs_new.append(img)
        if imgs_new:
            imgs += imgs_new
            count_no_imgs = 0
        else:
            print('no imgs')
            count_no_imgs += 1
            if count_no_imgs > 1:
                print('break')
                break
            
        if len(imgs) >= max_pid:
            break

        if cw is not None:
            if not cw.alive:
                break
            cw.setTitle(u'{}  {} - {}'.format(tr_(u'읽는 중...'), title, len(imgs)))

    if not imgs:
        raise Exception('no imgs')
            
    return imgs
def extract(name, html, cw=None):
    print_ = get_print(cw)
    value = re.find(r'''{} *= *['"](.*?)['"]'''.format(name), html)
    if value is None:
        value = json.loads(re.find(r'''{} *= *(\[.*?\])'''.format(name), html))
    print_('{}: {}'.format(name, value))
    if value is None:
        raise Exception('No {}'.format(name))
    return value
def print_streams(streams, cw):
    print_ = get_print(cw)

    for stream in streams:
        print_(u'[{}][{}fps][{}{}][{}] {} [{} / {}] ─ {}'.format(
            stream.resolution, stream.fps, stream.abr_str,
            '(fixed)' if stream.abr_fixed else '', stream.tbr, stream.subtype,
            stream.video_codec, stream.audio_codec, stream.format))
    print_('')
示例#8
0
def get_sd(url, session=None, html=None, cw=None, wait=True):
    print_ = get_print(cw)

    if html:
        soup = Soup(html)
        check_error(soup, cw, wait)
        for script in soup.findAll('script'):
            j = get_j(script)
            if j:
                break
        else:
            raise Exception('no _sharedData!!')
    else:
        for try_ in range(4):
            _wait(cw)
            html = read_html(url, session, cw)
            soup = Soup(html)
            check_error(soup, cw, wait)
            for script in soup.findAll('script'):
                j = get_j(script)
                if j:
                    break
            else:
                continue
            break
        else:
            raise Exception('no _sharedData')
    for script in soup.findAll('script'):
        s = script.string
        if s and 'window.__additionalDataLoaded(' in s:
            s = cut_pair(s)
            j_add = json.loads(s)
            try:
                j['entry_data']['PostPage'][0].update(j_add)
            except:
                j['entry_data']['ProfilePage'][0].update(j_add)  #2900

    # Challenge
    challenge = j['entry_data'].get('Challenge')
    if challenge:
        try:
            for cont in challenge[0]['extraData']['content']:
                title = cont.get('title')
                if title:
                    break
            else:
                raise Exception('no title')
        except:
            title = 'Err'
        raise errors.LoginRequired(title)

    # LoginAndSignupPage
    login = j['entry_data'].get('LoginAndSignupPage')
    if login:
        raise errors.LoginRequired()

    return j
示例#9
0
    def get(self, url):
        print_ = get_print(self.cw)
        if self._url:
            return self._url
        ydl = ytdl.YoutubeDL(cw=self.cw)
        try:
            info = ydl.extract_info(url)
        except Exception as e:
            ex = type(ytdl.get_extractor(url))(ydl)
            _download_info = getattr(ex, '_download_info', None)
            if _download_info is not None:
                vod_id = ex._match_id(url)
                info = _download_info(vod_id)
                print_(info)
            if 'HTTPError 403' in str(e):
                raise errors.LoginRequired()
            raise

        def print_video(video):
            print_('[{}] [{}] [{}] {}'.format(video['format_id'],
                                              video.get('height'),
                                              video.get('tbr'), video['url']))

        videos = [video for video in info['formats'] if video.get('height')]

        videos = sorted(videos,
                        key=lambda video:
                        (video.get('height', 0), video.get('tbr', 0)),
                        reverse=True)

        for video in videos:
            print_video(video)

        for video in videos:
            if video.get('height', 0) <= get_resolution():  #3723
                video_best = video
                break
        else:
            video_best = videos[-1]
        print_video(video)

        video = video_best['url']

        ext = get_ext(video)
        self.title = info['title']
        id = info['display_id']

        if ext.lower() == '.m3u8':
            video = M3u8_stream(video, n_thread=4, alter=alter)
            ext = '.mp4'
        self.filename = format_filename(self.title, id, ext)
        self.url_thumb = info['thumbnail']
        self.thumb = BytesIO()
        downloader.download(self.url_thumb, buffer=self.thumb)
        self._url = video
        return self._url
def get_files(url, session, multi_post=False, cw=None):
    print_ = get_print(cw)
    html = read_html(url, session=session)
    soup = Soup(html)
    h = soup.find('h1', class_='title')
    content = h.parent.parent.parent
    title = h.text.strip()
    youtube = content.find('div', class_='embedded-video')
    video = content.find('video')
    if youtube:
        type = 'youtube'
    elif video:
        type = 'video'
    else:
        type = 'image'
    print_(('type: {}').format(type))
    files = []
    if type == 'image':
        urls = set()
        for img in content.findAll('img'):
            img = urljoin(url, img.parent.attrs['href'])
            if '/files/' not in img:
                continue
            if img in urls:
                print('duplicate')
                continue
            urls.add(img)
            file = File(type,
                        img,
                        title,
                        url,
                        len(files),
                        multi_post=multi_post)
            files.append(file)

    elif type == 'youtube':
        src = urljoin(url, youtube.find('iframe').attrs['src'])
        file = File(type, src, title, url)
        files.append(file)
    elif type == 'video':
        url_thumb = urljoin(url, video.attrs['poster'])
        print('url_thumb:', url_thumb)
        id = re.find('videos/([0-9a-zA-Z_-]+)', url, err='no video id')
        url_data = urljoin(url, '/api/video/{}'.format(id))
        s_json = read_html(url_data, url, session=session)
        data = json.loads(s_json)
        video = data[0]
        url_video = urljoin(url, video['uri'])
        file = File(type, url_video, title, url)
        file.url_thumb = url_thumb
        file.thumb = BytesIO()
        downloader.download(url_thumb, buffer=file.thumb, referer=url)
        files.append(file)
    else:
        raise NotImplementedError(type)
    return files
def get_video(session, id, cw=None):
    print_ = get_print(cw)

    try:
        info = nndownload.request_video(session, id)
    except:
        raise Exception('Err')
    video = Video(session, info)

    return video
def get_imgs_more(username, session, title, types, n=None, format='[%y-%m-%d] id_ppage', cw=None, mode='media', method='tab', imgs=None):
    print_ = get_print(cw)
    imgs = imgs or []
    print_('imgs: {}, types: {}'.format(len(imgs), ', '.join(types)))

    artist, username = get_artist_username(username, session)#
    
    # Range
    n = max(n or 0, get_max_range(cw))

    ids_set = set(img.id for img in imgs)

    count_no_imgs = 0

    filter_ = '' if options.get('experimental') else ' filter:media' #2687

    while len(imgs) < n:
        if ids_set:
            max_id = min(ids_set) - 1
            q = 'from:{} max_id:{} exclude:retweets{} -filter:periscope'.format(username, max_id, filter_)
        else:
            q = 'from:{} exclude:retweets{} -filter:periscope'.format(username, filter_)
        print(q)

        tweets = []
        for tweet in list(TwitterAPI(session, cw).search(q)):
            id = int(tweet['id'])
            if id in ids_set:
                print_('duplicate: {}'.format(id))
                continue
            ids_set.add(id)
            tweets.append(tweet)
            
        if tweets:
            count_no_imgs = 0
        else:
            count_no_imgs += 1
            change_ua(session)
            if count_no_imgs >= 3:
                break
            print_('retry...')
            continue
        
        for tweet in tweets:
            imgs += get_imgs_from_tweet(tweet, session, types, format, cw)

        msg = '{}  {} (@{}) - {}'.format(tr_('읽는 중...'), artist, username, len(imgs))
        if cw and not cw.alive:
            break
        if cw:
            cw.setTitle(msg)
        else:
            print(msg)

    return imgs
示例#13
0
def get_imgs(page, cw=None):
    print_ = get_print(cw)
    html = downloader.read_html(page.url)
    soup = Soup(html)

    type_ = re.find('''webtoonType *: *['"](.+?)['"]''', html)
    print_('type: {}'.format(type_))

    imgs = []
    if type_ == 'DEFAULT':  # https://m.comic.naver.com/webtoon/detail.nhn?titleId=715772
        view = soup.find('div', class_='toon_view_lst')
        for img in view.findAll('img'):
            img = img.attrs.get('data-src')
            if not img:
                continue
            img = urljoin(page.url, img)
            img = Image(img, page, len(imgs))
            imgs.append(img)
    elif type_ == 'CUTTOON':  # https://m.comic.naver.com/webtoon/detail.nhn?titleId=752803
        view = soup.find('div', class_='swiper-wrapper')
        for div in view.findAll('div', class_='swiper-slide'):
            if div.parent != view:
                continue
            if div.find('div', class_='cut_viewer_last'):
                print('cut_viewer_last')
                continue
            if div.find('div', class_='cut_viewer_recomm'):
                print('cut_viewer_recomm')
                continue
            img = div.find('img')
            img = img.attrs['data-src']
            img = urljoin(page.url, img)
            img = Image(img, page, len(imgs))
            imgs.append(img)
    elif type_ == 'EFFECTTOON':  #2313; https://m.comic.naver.com/webtoon/detail.nhn?titleId=670144
        img_base = re.find('''imageUrl *: *['"](.+?)['"]''', html) + '/'
        print('img_base:', img_base)
        url_api = re.find('''documentUrl *: *['"](.+?)['"]''', html)
        data_raw = downloader.read_html(url_api, page.url)
        data = json.loads(data_raw)
        for img in data['assets']['stillcut'].values(
        ):  # ordered in python3.7+
            img = urljoin(img_base, img)
            img = Image(img, page, len(imgs))
            imgs.append(img)
    else:
        _imgs = re.findall('sImageUrl *: *[\'"](.+?)[\'"]', html)
        if not _imgs:
            raise Exception('no imgs')
        for img in _imgs:
            img = urljoin(page.url, img)
            img = Image(img, page, len(imgs))
            imgs.append(img)

    return imgs
示例#14
0
def get_imgs(page, session, cw):
    print_ = get_print(cw)

    if not downloader.cookiejar.get(
            'PROF', domain='.daum.net') and page.serviceType != 'free':  #3314
        raise NotPaidError()

    html = downloader.read_html(page.url, session=session)
    header, id = get_id(page.url)
    t = int(time())
    soup = Soup(html)
    type_ = header_to_type(header)

    url_data = 'http://webtoon.daum.net/data/pc/{}/viewer/{}?timeStamp={}'.format(
        type_, id, t)
    data_raw = downloader.read_html(url_data,
                                    session=session,
                                    referer=page.url)
    data = json.loads(data_raw)
    if header == 'league_':
        m_type = None
    else:
        m_type = data['data']['webtoonEpisode']['multiType']
    print_('m_type: {}'.format(m_type))

    if m_type == 'chatting':
        page.url = page.url.replace('daum.net/', 'daum.net/m/')
        url_data = 'http://webtoon.daum.net/data/mobile/{}/viewer?id={}&{}'.format(
            type_, id, t)
        data_raw = downloader.read_html(url_data,
                                        session=session,
                                        referer=page.url)
        data = json.loads(data_raw)
        imgs = []
        for chat in data['data']['webtoonEpisodeChattings']:
            img = chat.get('image')
            if not img:
                continue
            img = Image(img['url'], page, len(imgs))
            imgs.append(img)
    else:
        url_data = 'http://webtoon.daum.net/data/pc/{}/viewer_images/{}?timeStamp={}'.format(
            type_, id, t)
        data_raw = downloader.read_html(url_data,
                                        session=session,
                                        referer=page.url)
        data = json.loads(data_raw)
        if not data.get('data'):
            raise NotPaidError()
        imgs = []
        for img in data['data']:
            img = Image(img['url'], page, len(imgs))
            imgs.append(img)

    return imgs
def check_error(soup, cw, wait):
    print_ = get_print(cw)
    
    err = soup.find('div', class_='error-container')
    if err:
        err = err.text.strip()
        if wait:
            print_('err: {}'.format(err))
            sleep(60*30, cw)
        else:
            raise Exception(err)
def print_streams(streams, cw):
    print_ = get_print(cw)

    for stream in streams:
        format = stream.format
        print_(u'[{}][{}fps][{}] {} {} ─ {}'.format(stream.resolution,
                                                    stream.fps, stream.abr,
                                                    stream.subtype,
                                                    stream.audio_codec,
                                                    format))
    print_('')
示例#17
0
def real_url(url, session=None, cw=None):
    print_ = get_print(cw)
    if session is None:
        session = Session()
    data = clf2.solve(url, session=session, cw=cw)
    url_new = data['url']
    print('url_new:', url_new)
    if url_new != url:
        url_new = urljoin(url_new, '/' + u'/'.join(url.split('/')[3:]))  #
        print_(u'[redirect domain] {} -> {}'.format(url, url_new))
    return url_new
def get_title(soup, cw=None):
    print_ = get_print(cw)
    for h1 in soup.findAll('h1'):
        title = h1.text.strip()
        if title:
            break
    else:
        raise Exception('no title')
    title_clean = clean_title(title)
    print_('get_title: "{}"({}) "{}"({})'.format(title, title.encode('utf8'), title_clean, title_clean.encode('utf8')))
    return title_clean
    def read(self):
        ui_setting = self.ui_setting
        cw = self.customWidget
        print_ = get_print(cw)
        if self.yt_type == 'video':
            res = get_resolution()
            info = get_videos(self.url,
                              type=self.yt_type,
                              max_res=res,
                              only_mp4=False,
                              audio_included=not True,
                              cw=cw)
        else:
            abr = get_abr()
            info = get_videos(self.url, type=self.yt_type, max_abr=abr, cw=cw)
        videos = info['videos']

        cw.enableSegment(overwrite=True)

        # first video must be valid
        while videos:
            video = videos[0]
            try:
                video.url()
                break
            except Exception as e:
                print(e)
                videos.remove(video)
        else:
            raise Exception('No videos')

        if len(videos) > 1:
            p2f = get_p2f(cw)
            if p2f:
                self.single = False
                self.title = clean_title(info['title'])
                self.urls = [video.url for video in videos]
                video = videos[0]
                self.setIcon(video.thumb)
                return
            else:
                video = videos.pop(0)
                cw.gal_num = cw.url = video.url._url
                if videos and cw.alive:
                    s = u', '.join(video.url._url for video in videos)
                    self.exec_queue.put(([s, {
                        'youtube': cw.format
                    }], 'downButton(cw[0], format_selector=cw[1])'))

        self.urls.append(video.url)
        self.artist = video.username
        self.setIcon(video.thumb)

        self.title = video.title
def get_imgs_from_tweet(tweet, session, types, format, cw=None):
    print_ = get_print(cw)
    id = tweet['id_str']

    if 'extended_entities' not in tweet:
        tweet['extended_entities'] = {'media': []}

    for url_ in tweet['entities'].get('urls', []):
        url_ = url_['expanded_url']
        if '//twitpic.com/' in url_:
            print_('twitpic: {}'.format(url_))
            try:
                url_ = get_twitpic(url_, session)
                tweet['extended_entities']['media'].append({
                    'type':
                    'photo',
                    'media_url':
                    url_,
                    'expanded_url':
                    'https://twitter.com'
                })
            except Exception as e:
                print_('Invalid twitpic')
                print_(print_error(e)[-1])

    media = tweet['extended_entities']['media']

    time = get_time(tweet)

    imgs = []
    for m in media:
        type_ = m['type']
        if type_ == 'photo':
            type_ = 'img'
        elif type_ == 'animated_gif':
            type_ = 'video'
        if type_ not in types:
            continue
        if type_ == 'video':
            url_media = sorted(m['video_info']['variants'],
                               key=lambda x: x.get('bitrate', 0))[-1]['url']
        elif type_ == 'img':
            url_media = m['media_url']
            if ':' not in os.path.basename(url_media):
                url_media += ':orig'
        else:
            raise NotImplementedError('unknown type')
        url = m['expanded_url']
        img = Image(url_media, url, id, time, len(imgs), format, cw,
                    type_ == 'video')
        imgs.append(img)

    return imgs
示例#21
0
def fix_soup(soup, url, session=None, cw=None):
    '''
    fix_soup
    '''
    print_ = get_print(cw)
    if soup.find('div', class_='logo'):
        return soup
    print_('invalid soup: {}'.format(url))

    res = clf2.solve(url, session=session, cw=cw)

    return Soup(res['html'])
示例#22
0
def get_video(session, url, format, cw=None):
    print_ = get_print(cw)

    id = get_id(url)
    if 'live.nico' in url:  #3986
        raise NotImplementedError('nama')
        #info = nndownload.request_nama(session, id)
    else:
        info = nndownload.request_video(session, id)
    video = Video(session, info, format, cw)

    return video
示例#23
0
    def __init__(self,
                 url,
                 format=u'[%y-%m-%d] id_ppage',
                 session=None,
                 cw=None,
                 media=None):
        print('Node', url)
        print_ = get_print(cw)
        self.id = re.search(FORMAT_PIN, url).groups()[0]
        self.imgs = []
        self.session = session

        if not media:
            if False:  # Original
                j = get_sd(url, self.session, cw=cw)
                data = j['entry_data']['PostPage'][0]['graphql']
            else:
                variables = {
                    "shortcode": self.id,
                    "child_comment_count": 3,
                    "fetch_comment_count": 40,
                    "parent_comment_count": 24,
                    "has_threaded_comments": True,
                }
                j = get_query('a9441f24ac73000fa17fe6e6da11d59d', variables,
                              session, cw)
                data = j['data']
            media = data['shortcode_media']

        if 'video_url' in media:
            urls = [media['video_url']]
        elif 'edge_sidecar_to_children' in media:
            edges = media['edge_sidecar_to_children']['edges']
            urls = []
            for edge in edges:
                node = edge['node']
                if 'video_url' in node:
                    url_ = node['video_url']
                else:
                    url_ = node['display_resources'][(-1)]['src']
                urls.append(url_)
        else:
            urls = [media['display_resources'][(-1)]['src']]
        time = media['taken_at_timestamp']

        self.date = datetime.fromtimestamp(time)
        self.timeStamp = self.date.strftime(format).replace(':', u'\uff1a')
        for p, img in enumerate(urls):
            ext = os.path.splitext(img.split('?')[0].split('#')[0])[1]
            filename = ('{}{}').format(self.timeStamp, ext).replace(
                'id', str(self.id)).replace('page', str(p))
            img = Image(img, url, filename)
            self.imgs.append(img)
示例#24
0
def get_imgs_channel(url, html=None, cw=None):
    print_ = get_print(cw)
    if html is None:
        html = downloader.read_html(url)
    info = get_info(url, html)

    # Range
    max_pid = get_max_range(cw)

    ids = set()
    imgs = []
    for p in range(1000):
        url_api = 'https://bcy.net/apiv3/user/selfPosts?uid={}'.format(
            info['uid'])
        if imgs:
            url_api += '&since={}'.format(imgs[-1].id)
        data_raw = downloader.read_html(url_api, url)
        data = json.loads(data_raw)['data']
        items = data['items']
        if not items:
            print('no items')
            break
        c = 0
        for item in items:
            check_alive(cw)
            id = item['item_detail']['item_id']
            if id in ids:
                print('duplicate')
                continue
            c += 1
            ids.add(id)
            url_single = u'https://bcy.net/item/detail/{}'.format(id)
            imgs_single = get_imgs(url_single, cw=cw)
            print_(str(id))
            for p, img in enumerate(imgs_single):
                img = Image(img._url, url_single, id, p)
                imgs.append(img)
            s = u'{} {} - {}'.format(tr_(u'읽는 중...'), info['artist'],
                                     min(len(imgs), max_pid))
            if cw:
                cw.setTitle(s)
            else:
                print(s)

            if len(imgs) >= max_pid:
                break
        if not c:
            print('not c')
            break
        if len(imgs) >= max_pid:
            print('over max_pid:', max_pid)
            break
    return imgs[:max_pid]
示例#25
0
def get_video(url, session, cw, ie_key=None):
    print_ = get_print(cw)
    try:
        video = _get_video(url, session, cw, ie_key, allow_m3u8=True)
        if isinstance(video.url(), M3u8_stream):
            c = video.url().segs[0].download(cw)
            if not c:
                raise Exception('invalid m3u8')
        return video
    except Exception as e:
        print_(e)
        return _get_video(url, session, cw, ie_key, allow_m3u8=False)
 def get(self, _):
     print_ = get_print(self.cw)
     url = self._url
     ext = get_ext(url)
     if ext.lower() == '.gif':
         print_('get_ext: {}, {}'.format(self.id_, url))
         try:
             ext = downloader.get_ext(url)
         except Exception as e: #3235
             print_('Err: {}, {}\n'.format(self.id_, url)+print_error(e)[0])
     self.filename = '{}_p{}{}'.format(self.id_, self.p, ext)
     return url
def get_imgs(url, title=None, cw=None):
    print_ = get_print(cw)
    url = clean_url(url)

    id = get_id(url)
    url = u'https://nijie.info/members_illust.php?id={}'.format(id)

    # Range
    max_pid = get_max_range(cw)

    imgs = []
    url_imgs = set()
    for p in range(1, 1 + 100):
        url = setPage(url, p)
        print_(url)
        html = downloader.read_html(url)

        soup = Soup(html)
        posts = soup.findAll('div', class_='nijie')
        if not posts:
            print('no posts')
            break
        c = 0
        for post in posts:
            url_img = urljoin(url, post.a.attrs['href'])
            if url_img in url_imgs:
                print('duplicate:', url_img)
                continue
            url_imgs.add(url_img)
            id = int(re.find('[?&]id=([0-9]+)', url_img))
            multi = post.find('div', class_='thumbnail-icon')
            if multi:
                imgs_ = get_imgs_post(id, url_img)  #
            else:
                imgs_ = [Image(id, url_img, 0)]

            imgs += imgs_
            c += 1

            if len(imgs) >= max_pid:
                break

            msg = u'{}  {} - {}'.format(tr_(u'읽는 중...'), title, len(imgs))
            if cw:
                if not cw.alive:
                    return
                cw.setTitle(msg)
            else:
                print(msg)

        if len(imgs) >= max_pid or c == 0:
            break
    return imgs
def read_channel(url_page, cw=None):
    print_ = get_print(cw)
    res = re.find(CHANNEL_PATTERN, url_page)
    if res is None:
        raise Exception('Not channel')
    header, username = res
    print(header, username)
    max_pid = get_max_range(cw)
    info = {}
    info['header'] = header
    info['username'] = username
    session = Session()
    urls = []
    ids = set()
    for p in range(100):
        url_api = urljoin(url_page,
                          '/{}/{}/videos/best/{}'.format(header, username, p))
        print_(url_api)
        r = session.post(url_api)
        data = json.loads(r.text)

        videos = data.get('videos')  #4530
        if not videos:
            print_('empty')
            break

        for video in videos:
            id_ = video['id']
            if id_ in ids:
                print_('duplicate: {}'.format(id_))
                continue
            ids.add(id_)
            info['name'] = video['pn']
            urls.append(urljoin(url_page, video['u']))

        if len(urls) >= max_pid:
            break

        n = data['nb_videos']

        s = '{} {} - {}'.format(tr_('읽는 중...'), info['name'], len(urls))
        if cw:
            cw.setTitle(s)
        else:
            print(s)
        if len(ids) >= n:
            break
        sleep(1, cw)
    if not urls:
        raise Exception('no videos')
    info['urls'] = urls[:max_pid]
    return info
 def get(self, _):
     print_ = get_print(self.cw)
     url = self._url
     ext = get_ext(url)
     if ext.lower()[1:] not in ['jpg', 'png', 'mp4']:  #4645
         print_('get_ext: {}, {}'.format(self.id_, url))
         try:
             ext = downloader.get_ext(url, referer=_)
         except Exception as e:  #3235
             print_('Err: {}, {}\n'.format(self.id_, url) +
                    print_error(e)[0])
     self.filename = '{}_p{}{}'.format(self.id_, self.p, ext)
     return url
示例#30
0
    def get(self, url):
        print_ = get_print(self.cw)
        if self._url:
            return self._url

        info = self.info

        ##        ydl = ytdl.YoutubeDL()
        ##        info = ydl.extract_info(url)

        formats = info['formats']
        print(formats)
        formats = sorted(formats,
                         key=lambda x: int(x.get('abr', 0)),
                         reverse=True)
        url_audio = None

        for format in formats:
            protocol = format['protocol']
            print_(u'【{}】 format【{}】 abr【{}】'.format(protocol,
                                                     format['format'],
                                                     format.get('abr', 0)))
            if not url_audio and protocol in ['http', 'https']:
                url_audio = format['url']

        if not url_audio:
            url_audio = M3u8_stream(formats[0]['url'])
            self.album_art = False  #

        self.username = info['uploader']
        self.title = u'{} - {}'.format(self.username, info['title'])
        self.filename = u'{}{}'.format(
            clean_title(self.title, allow_dot=True, n=-4), '.mp3')

        thumb = None
        for t in info['thumbnails'][::-1]:
            width = t.get('width', 1080)
            if not 100 <= width <= 500:
                continue
            url_thumb = t['url']
            thumb = BytesIO()
            try:
                downloader.download(url_thumb, buffer=thumb)
                break
            except Exception as e:
                print(e)
                thumb = None
        self.thumb = thumb

        self._url = url_audio
        return self._url