Exemplo n.º 1
0
    def __get_cookie(self) -> Session:
        session = requests.Session()
        user_key = Session().cookies.get("USERKEY", domain=".novelpia.com")
        login_key = Session().cookies.get("LOGINKEY", domain=".novelpia.com")

        if user_key and login_key:
            session.cookies.set("USERKEY", user_key, domain=".novelpia.com")
            session.cookies.set("LOGINKEY", login_key, domain=".novelpia.com")
        return session
 def soup(self):
     if self._soup is None:
         self.session = Session()
         self._soup = get_soup(self.url,
                               session=self.session,
                               cw=self.customWidget)
     return self._soup
    def init(self):
        self.url = clean_url(self.url)
        self.session = Session()
        if re.search(PATTERN_ID, self.url):  #1799
            select = self.soup.find('select', class_='bookselect')
            for i, op in enumerate(select.findAll('option')[::-1]):
                if 'selected' in op.attrs:
                    break
            else:
                raise Exception('no selected option')
            for a in self.soup.findAll('a'):
                url = urljoin(self.url, a.get('href') or '')
                if re.search(PATTERN, url):
                    break
            else:
                raise Exception('list not found')
            self.url = self.fix_url(url)
            self._soup = None

            for i, page in enumerate(
                    get_pages(self.url, self.session, self.soup)):
                if page.id == int(op['value']):
                    break
            else:
                raise Exception('can not find page')
            self.cw.range_p = [i]
Exemplo n.º 4
0
def enter():
    print('enter')
    session = Session()

    r = session.get(URL_ENTER)

    # 862
    html = r.text
    soup = Soup(html)
    box = soup.find('aside', id='FilterBox')
    data = {}
    for select in box.findAll('select'):
        name = select.attrs['name']
        value = select.findAll('option')[-1].attrs['value']
        print(name, value)
        data[name] = value
    for input in box.findAll('input'):
        name = input.attrs['name']
        value = input.attrs['value']
        if name.startswith('rating_') or 'CSRF_TOKEN' in name:
            print(name, value)
            data[name] = value
    data.update({
        'filter_media': 'A',
        'filter_order': 'date_new',
        'filter_type': '0',
    })
    r = session.post(URL_FILTER, data=data, headers={'Referer': r.url})
    print(r)

    return session
def get_id(url, cw=None):
    print_ = get_print(cw)

    url = url.split('?')[0].split('#')[0]

    if '/artwork/' in url:
        id_art = get_id_art(url)
        imgs = get_imgs_page(id_art, session=Session(), cw=cw)
        return imgs[0].data['user']['username']

    if '.artstation.' in url and 'www.artstation.' not in url:
        id = url.split('.artstation')[0].split('//')[-1]
        type = None
    elif 'artstation.com' in url:
        paths = url.split('artstation.com/')[1].split('/')
        id = paths[0]
        type = paths[1] if len(paths) > 1 else None
    else:
        id = url.replace('artstation_', '').replace('/', '/')
        type = None

    if type not in [None, 'likes']:
        type = None

    print_('type: {}, id: {}'.format(type, id))

    if type:
        return '{}/{}'.format(id, type)
    return id
Exemplo n.º 6
0
 def init(self):
     self.url = self.url.replace('lhscan.net', 'loveheaven.net')
     self.session = Session()
     #clf2.solve(self.url, session=self.session, cw=self.cw)
     soup = self.soup
     if not soup.find('ul', class_='manga-info'):
         self.Invalid(u'{}: {}'.format(tr_(u'목록 주소를 입력해주세요'), self.url))
def get_session():
    session = Session()
    session.cookies.set(name='over18',
                        value='yes',
                        path='/',
                        domain='.syosetu.com')
    return session
Exemplo n.º 8
0
def get_soup(url, session=None):
    if session is None:
        session = Session()
    res = clf2.solve(url, session=session)
    soup = Soup(res['html'], apply_css=True)

    return session, soup, res['url']
    def init(self):
        if u'bdsmlr.com/post/' in self.url:
            raise errors.Invalid(
                tr_(u'개별 다운로드는 지원하지 않습니다: {}').format(self.url))

        self.url = 'https://{}.bdsmlr.com'.format(self.id_)
        self.session = Session()
        clf2.solve(self.url, session=self.session, cw=self.cw)
Exemplo n.º 10
0
def get_soup_session(url, cw=None):
    print_ = get_print(cw)
    session = Session()
    res = clf2.solve(url, session=session, cw=cw)
    print_('{} -> {}'.format(url, res['url']))
    if res['url'].rstrip('/') == 'https://welovemanga.one':
        raise errors.LoginRequired()
    return Soup(res['html']), session
Exemplo n.º 11
0
    def read(self):
        session = Session()
        video = get_video(self.url, session, self.cw)
        self.urls.append(video.url)

        self.setIcon(video.thumb)

        self.title = video.title
Exemplo n.º 12
0
def get_videos(url, cw=None):
    print_ = get_print(cw)
    info = {}
    user_id = re.find(r'twitch.tv/([^/?]+)', url, err='no user_id')
    print(user_id)
    session = Session()
    r = session.get(url)
    s = cut_pair(re.find(r'headers *: *({.*)', r.text, err='no headers'))
    print(s)
    headers = json_loads(s)

    payload = [{
        'operationName': 'ClipsCards__User',
        'variables': {
            'login': user_id,
            'limit': 20,
            'criteria': {
                'filter': 'ALL_TIME'
            }
        },
        'extensions': {
            'persistedQuery': {
                'version':
                1,
                'sha256Hash':
                'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777'
            }
        },
    }]
    videos = []
    cursor = None
    cursor_new = None
    while True:
        if cursor:
            payload[0]['variables']['cursor'] = cursor
        r = session.post('https://gql.twitch.tv/gql',
                         json=payload,
                         headers=headers)
        #print(r)
        data = r.json()
        for edge in data[0]['data']['user']['clips']['edges']:
            url_video = edge['node']['url']
            info['name'] = edge['node']['broadcaster']['displayName']
            video = Video(url_video)
            video.id = int(edge['node']['id'])
            videos.append(video)
            cursor_new = edge['cursor']
        print_('videos: {} / cursor: {}'.format(len(videos), cursor))
        if cursor == cursor_new:
            print_('same cursor')
            break
        if cursor_new is None:
            break
        cursor = cursor_new
    if not videos:
        raise Exception('no videos')
    info['videos'] = sorted(videos, key=lambda video: video.id, reverse=True)
    return info
Exemplo n.º 13
0
 def init(self):
     self.url = self.url.replace('bdsmlr_', '')
     
     if u'bdsmlr.com/post/' in self.url:
         return self.Invalid(tr_(u'개별 다운로드는 지원하지 않습니다: {}').format(self.url), fail=False)
     
     self.url = 'https://{}.bdsmlr.com'.format(self.id_)
     self.session = Session()
     clf2.solve(self.url, session=self.session,  cw=self.customWidget)
 def init(self):
     self.url_main = 'https://www.artstation.com/{}'.format(self.id.replace('artstation_', '', 1).replace('/', '/'))
     
     if '/artwork/' in self.url:
         pass#raise NotImplementedError('Single post')
     else:
         self.url = self.url_main
     
     self.session = Session()
Exemplo n.º 15
0
 async def get_current_user(self) -> Session:
     """
     实例化session并获取用户session数据
     若不使用自带session实现请重写本方法
     """
     if not self.get_secure_cookie('session_id'):
         self.set_secure_cookie('session_id', uuid4().hex)
     self.Session = Session.Session(self)
     await self.Session.get_data()
     return self.Session.data
Exemplo n.º 16
0
    def read(self):
        format = compatstr(
            self.ui_setting.youtubeFormat.currentText()).lower().strip()
        session = Session()
        video = get_video(self.url, session, format)
        self.urls.append(video.url)

        self.setIcon(video.thumb)

        self.title = video.title
Exemplo n.º 17
0
def real_url(url, session=None, cw=None):
    print_ = get_print(cw)
    if session is None:
        session = Session()
    data = clf2.solve(url, session=session, cw=cw)
    url_new = data['url']
    print('url_new:', url_new)
    if url_new != url:
        url_new = urljoin(url_new, '/' + u'/'.join(url.split('/')[3:]))  #
        print_(u'[redirect domain] {} -> {}'.format(url, url_new))
    return url_new
Exemplo n.º 18
0
def get_video(url, session=None):
    if session is None:
        session = Session()
        session.headers['User-Agent'] = downloader.hdr['User-Agent']
    session.headers['X-Directive'] = 'api'
    html = downloader.read_html(url, session=session)
    soup = Soup(html)
    for script in soup.findAll('script'):
        script = script.text or script.string or ''
        data = re.find('window.__NUXT__=(.+)', script)
        if data is not None:
            data = data.strip()
            if data.endswith(';'):
                data = data[:-1]
            data = json.loads(data)
            break
    else:
        raise Exception('No __NUXT__')

    info = data['state']['data']['video']['hentai_video']
    query = info['slug']
    #url_api = 'https://members.hanime.tv/api/v3/videos_manifests/{}?'.format(query) # old
    url_api = 'https://hanime.tv/rapi/v7/videos_manifests/{}?'.format(
        query)  # new
    print(url_api)
    hdr = {
        'x-signature':
        ''.join('{:x}'.format(randrange(16)) for i in range(32)),
        'x-signature-version': 'web2',
        'x-time': str(int(time())),
    }
    r = session.get(url_api, headers=hdr)
    print(r)
    data = json.loads(r.text)
    streams = []
    for server in data['videos_manifest']['servers']:
        streams += server['streams']

    streams_good = []
    for stream in streams:
        url_video = stream['url']
        if not url_video or 'deprecated.' in url_video:
            continue
        streams_good.append(stream)

    if not streams_good:
        raise Exception('No video available')
    print('len(streams_good):', len(streams_good))
    for stream in streams_good:
        print(stream['extension'], stream['width'], stream['filesize_mbs'],
              stream['url'])

    stream = streams_good[0]
    return Video(info, stream), session
Exemplo n.º 19
0
    def init(self):
        cw = self.cw
        self.session = Session()
        res = clf2.solve(self.url, self.session, cw)
        soup = Soup(res['html'])
        if is_captcha(soup):

            def f(html):
                return not is_captcha(Soup(html))

            clf2.solve(self.url, self.session, cw, show=True, f=f)
Exemplo n.º 20
0
 def set_session(self, user):
     sess_id = str(uuid4())
     current_session = Session(username=user,
                               sess_id=sess_id,
                               created=datetime.now())
     session_cookie = Cookie.SimpleCookie()
     session_cookie['session_id'] = sess_id
     session_cookie['session_id']['path'] = "/"
     # set the session
     self.sessions[sess_id] = current_session
     self.default_header.append(
         ('Set-Cookie', session_cookie.output(header='').strip()))
def read_channel(url_page, cw=None):
    print_ = get_print(cw)
    res = re.find(CHANNEL_PATTERN, url_page)
    if res is None:
        raise Exception('Not channel')
    header, username = res
    print(header, username)
    max_pid = get_max_range(cw)
    info = {}
    info['header'] = header
    info['username'] = username
    session = Session()
    urls = []
    ids = set()
    for p in range(100):
        url_api = urljoin(url_page,
                          '/{}/{}/videos/best/{}'.format(header, username, p))
        print_(url_api)
        r = session.post(url_api)
        data = json.loads(r.text)

        videos = data.get('videos')  #4530
        if not videos:
            print_('empty')
            break

        for video in videos:
            id_ = video['id']
            if id_ in ids:
                print_('duplicate: {}'.format(id_))
                continue
            ids.add(id_)
            info['name'] = video['pn']
            urls.append(urljoin(url_page, video['u']))

        if len(urls) >= max_pid:
            break

        n = data['nb_videos']

        s = '{} {} - {}'.format(tr_('읽는 중...'), info['name'], len(urls))
        if cw:
            cw.setTitle(s)
        else:
            print(s)
        if len(ids) >= n:
            break
        sleep(1, cw)
    if not urls:
        raise Exception('no videos')
    info['urls'] = urls[:max_pid]
    return info
Exemplo n.º 22
0
 def get_session(self):
     try:
         session_cookie = Cookie.SimpleCookie(
             self.environ.get('HTTP_COOKIE', ""))
         session_morsel = session_cookie.get('session_id', Cookie.Morsel())
         current_session = self.sessions.get(
             session_morsel.value, Session(created=datetime(1970, 1, 1)))
         if current_session.created < (datetime.now() - timedelta(hours=2)):
             self.sessions.pop(current_session.sess_id, None)
         return bool(self.sessions.get(session_morsel.value, False))
     except:
         sys.stderr.write(traceback.format_exc())
         return False
def get_session(url, cw=None):
    #res = clf2.solve(url, cw=cw)
    #return res['session']
    session = Session()
    sessionid = session.cookies._cookies.get('.instagram.com', {}).get('/',{}).get('sessionid')
    if sessionid is None or sessionid.is_expired():
        raise errors.LoginRequired()
    session.headers['User-Agent'] = downloader.hdr['User-Agent']
    if not session.cookies.get('csrftoken', domain='.instagram.com'):
        csrf_token = generate_csrf_token()
        print('csrf:', csrf_token)
        session.cookies.set("csrftoken", csrf_token, domain='.instagram.com')
    return session
 def init(self):
     self.session = Session()  # 1791
     if 'pornhub_gif_' in self.url:
         self.url = 'https://www.pornhub.com/gif/{}'.format(
             self.url.replace('pornhub_gif_', ''))
     elif 'pornhub_album_' in self.url:
         self.url = 'https://www.pornhub.com/album/{}'.format(
             self.url.replace('pornhub_album_', ''))
     elif 'pornhub_' in self.url:
         self.url = 'https://www.pornhub.com/view_video.php?viewkey={}'\
                    .format(self.url.replace('pornhub_', ''))
     if 'pornhubpremium.com' in self.url.lower() and\
        not is_login(self.session, self.cw):
         return self.Invalid('[Pornhub] Login cookies required')
Exemplo n.º 25
0
    def read(self):
        self.session = Session()
        self.session.cookies.set('_ac', '1', domain='.video.fc2.com')
        info = get_info(self.url, self.session, self.cw)

        video = info['videos'][0]

        self.urls.append(video.url)

        f = BytesIO()
        downloader.download(video.url_thumb, referer=self.url, buffer=f)
        self.setIcon(f)

        self.title = info['title']
    def get(self, url):
        if self._url_video:
            return self._url_video
        cw = self.cw
        print_ = get_print(cw)
        html = downloader.read_html(url)
        soup = Soup(html)

        embedUrl = extract('embedUrl', html, cw)
        if embedUrl:
            raise EmbedUrlError('[pandoratv] EmbedUrl: {}'.format(embedUrl))
        
        uid = extract('strLocalChUserId', html, cw)
        pid = extract('nLocalPrgId', html, cw)
        fid = extract('strFid', html, cw)
        resolType = extract('strResolType', html, cw)
        resolArr = extract('strResolArr', html, cw)
        vodSvr = extract('nVodSvr', html, cw)
        resols = extract('nInfo', html, cw)
        runtime = extract('runtime', html, cw)

        url_api = 'http://www.pandora.tv/external/getExternalApi/getVodUrl/'
        data = {
            'userId': uid,
            'prgId': pid,
            'fid': fid,
            'resolType': resolType,
            'resolArr': ','.join(map(str, resolArr)),
            'vodSvr': vodSvr,
            'resol': max(resols),
            'runtime': runtime,
            'tvbox': 'false',
            'defResol': 'true',
            'embed': 'false',
            }
        session = Session()
        r = session.post(url_api, headers={'Referer': url}, data=data)
        data = json.loads(r.text)
        self._url_video = data['src']

        self.title = soup.find('meta', {'property': 'og:description'})['content']
        
        ext = get_ext(self._url_video)
        self.filename = format_filename(self.title, pid, ext)

        self.url_thumb = soup.find('meta', {'property': 'og:image'})['content']
        self.thumb = BytesIO()
        downloader.download(self.url_thumb, buffer=self.thumb)
        
        return self._url_video
Exemplo n.º 27
0
def read_channel(url_page, cw=None):
    print_ = get_print(cw)
    res = re.find(CHANNEL_PATTERN, url_page)
    if res is None:
        raise Exception('Not channel')
    header, username = res
    print(header, username)
    max_pid = get_max_range(cw, 2000)
    info = {}
    info['header'] = header
    info['username'] = username
    session = Session()
    urls = []
    urls_set = set()
    for p in range(100):
        url_api = urljoin(url_page, '/{}/{}/videos/best/{}'.format(header, username, p))
        print(url_api)
        r = session.post(url_api, data='main_cats=false')
        soup = Soup(r.text)
        thumbs = soup.findAll('div', class_='thumb-block')
        if not thumbs:
            print_('empty')
            break
        for thumb in thumbs:
            info['name'] = thumb.find('span', class_='name').text.strip()
            href = thumb.find('a')['href']
            href = urljoin(url_page, href)
            if href in urls_set:
                print_('duplicate: {}'.format(href))
                continue
            urls_set.add(href)
            urls.append(href)
        
        if len(urls) >= max_pid:
            break
        
        s = '{} {} - {}'.format(tr_('읽는 중...'), info['name'], len(urls))
        if cw:
            if not cw.alive:
                return
            cw.setTitle(s)
        else:
            print(s)
    if not urls:
        raise Exception('no videos')
    info['urls'] = urls[:max_pid]
    return info
        
Exemplo n.º 28
0
def get_soup(url, session=None):
    if session is None:
        session = Session()

    def f(html, browser=None):
        soup = Soup(html)
        if soup.find('form', {'name': 'fcaptcha'}):  #4660
            browser.show()
            return False
        browser.hide()
        return True

    res = clf2.solve(url, session=session, f=f)
    soup = Soup(res['html'], apply_css=True)

    return session, soup, res['url']
    def init(self):
        type = self.url.split('sankakucomplex.com')[0].split('//')[-1].strip('.').split('.')[-1]
        if type == '':
            type = 'www'
        if type not in ['chan', 'idol', 'www']:
            raise Exception('Not supported subdomain')
        self.type_sankaku = type
        self.url = self.url.replace('&commit=Search', '')
        self.url = clean_url(self.url)
        self.session = Session()

        if self.type_sankaku != 'www':
            login(type, self.session, self.cw)

        if self.type_sankaku == 'www':
            html = downloader.read_html(self.url, session=self.session)
            self.soup = Soup(html)
Exemplo n.º 30
0
    def init(self):
        self.url = self.url.replace('sankaku_', '')
        if '/post/' in self.url:
            return self.Invalid('Single post is not supported')

        if 'sankakucomplex.com' in self.url:
            self.url = self.url.replace('http://', 'https://')
            type = self.url.split('sankakucomplex.com')[0].split(
                '//')[-1].strip('.').split('.')[-1]
            if type == '':
                type = 'www'
            if type not in ['chan', 'idol', 'www']:
                raise Exception('Not supported subdomain')
        else:
            url = self.url
            url = url.replace(' ', '+')
            while '++' in url:
                url = url.replace('++', '+')
            url = urllib.quote(url)
            url = url.replace('%2B', '+')
            url = url.replace('%20', '+')  #
            if url.startswith('[chan]'):
                type = 'chan'
                url = url.replace('[chan]', '', 1).strip()
            elif url.startswith('[idol]'):
                type = 'idol'
                url = url.replace('[idol]', '', 1).strip()
            elif url.startswith('[www]'):
                type = 'www'
                url = url.replace('[www]', '', 1).strip()
            else:
                raise Exception('Not supported subdomain')
            self.url = u'https://{}.sankakucomplex.com/?tags={}'.format(
                type, url)
        self.type_sankaku = type
        self.url = self.url.replace('&commit=Search', '')
        self.url = clean_url(self.url)
        self.session = Session()

        if self.type_sankaku != 'www':
            login(type, self.session, self.customWidget)

        if self.type_sankaku == 'www':
            html = downloader.read_html(self.url, session=self.session)
            self.soup = Soup(html)