Пример #1
0
def login(se, proxy: dict, uid: str, pw: str) -> bool:
    """
    Login and set cookies for exhentai.
    Exceptions:
        globj.ValidationError: Raised when username/pw is wrong, or have no permission to get into exhentai.
        globj.ResponseError: Raised when server sends abnormal response(include AttributeError).
    """
    try:
        with se.post(_LOGIN_URL,
                     params={
                         'act': 'Login',
                         'CODE': '01'
                     },
                     data={
                         'CookieDate': '1',
                         'UserName': uid,
                         'PassWord': pw
                     },
                     headers={'User-Agent': misc.USER_AGENT},
                     proxies=proxy,
                     timeout=5) as login_res:
            login_html = BeautifulSoup(login_res.text, 'lxml')
            se.cookies.update(login_res.cookies)  # Set cookies

        if login_html.head.title.string == 'Please stand by...':
            with se.get(_EXHENTAI_URL,
                        proxies=proxy,
                        headers={'User-Agent': misc.USER_AGENT},
                        timeout=5) as ex_res:
                ex_html = BeautifulSoup(ex_res.text, 'lxml')
                if ex_html.head.title.string == 'ExHentai.org':
                    se.cookies.update(
                        ex_res.cookies)  # Set cookies for exhentai
                    return True
                else:
                    raise exception.ValidationError(
                        'Login: Cannot get into exhentai.')
        elif login_html.head.title.string == 'Log In':
            raise exception.ValidationError(
                'Login: Incorrect username or password.')
        else:
            raise exception.ResponseError('Login: Abnormal response.')

    except requests.Timeout:
        raise requests.Timeout('Login: Timeout.')
    except AttributeError as e:
        raise exception.ResponseError('Login: ' + repr(e))
Пример #2
0
def information(se, proxy: dict, addr: str) -> dict:
    """
    Fetch gallery information, include misc info and thumbnail.
    Args:
        se: Session instance.
        proxy: (Optional) The proxy used.
        addr: Gallery address.
    Exceptions:
        globj.ResponseError: Raised when server sends abnormal response.
    """
    re_thumb = re.compile(r'.*url\((.*)\).*')
    try:
        with se.get(addr,
                    params={'inline_set': 'ts_m'},
                    headers={'User-Agent': misc.USER_AGENT},
                    proxies=proxy,
                    timeout=5) as gallery_res:
            gallery_html = BeautifulSoup(gallery_res.text, 'lxml')
        _ban_checker(gallery_html)
        if 'Gallery not found.' in gallery_html.body.get_text(
        ) or 'Key missing' in gallery_html.body.get_text():
            raise exception.WrongAddressError('Wrong address provided.')
        name: str = gallery_html.find('h1',
                                      id='gj').string  # Japanese name is prior
        if not name:
            name = gallery_html.find('h1', id='gn').string
        info = gallery_html.find_all('td', class_='gdt2')
        thumb = re_thumb.match(
            gallery_html.find('div', id='gd1').div['style']).group(1)
        if name and info and thumb:
            return {
                'addr': addr,
                'name': name,
                'size': info[4].string,
                'page': info[5].string[:-6],
                'thumb': thumb
            }
        else:
            raise exception.ResponseError('Information: Abnormal response.')

    except requests.Timeout:
        raise requests.Timeout('Information: Timeout.')
    except AttributeError as e:
        raise exception.ResponseError('Information: ' + repr(e))
Пример #3
0
def fetch_keys(se, proxy: dict, info: dict) -> dict:
    """
    Fetch keys(imgkeys and showkey) from gallery.
    Args:
        se: Session instance.
        proxy: (Optional) The proxy used.
        info: Information of the gallery.
    Return:
        A dictionary. {'page': imgkey, '0': showkey}
    Exceptions:
        globj.ResponseError: Raised when server sends abnormal response.
    """
    re_imgkey = re.compile(r'https://exhentai\.org/s/(\w{10})/\d*-(\d{1,4})')
    re_showkey = re.compile(r'[\S\s]*showkey="(\w{11})"[\S\s]*')
    gid = info['addr'].split('/')[-3]
    pn = int(info['page']) // 40 + 1  # range(0) has no element
    keys = dict()
    try:
        for p in range(pn):
            with se.get(info['addr'],
                        params={
                            'inline_set': 'ts_m',
                            'p': p
                        },
                        headers={'User-Agent': misc.USER_AGENT},
                        proxies=proxy,
                        timeout=5) as gallery_res:
                gallery_html = BeautifulSoup(gallery_res.text, 'lxml')
            _ban_checker(gallery_html)

            # Fetch imgkey from every picture
            pics = gallery_html.find_all('div', class_='gdtm')
            for item in pics:
                match = re_imgkey.match(item.a['href'])
                keys[match.group(2)] = match.group(1)

        # Fetch showkey from first picture
        showkey_url = '/'.join(
            ['https://exhentai.org/s', keys['1'], gid + '-1'])
        with se.get(showkey_url,
                    headers={'User-Agent': misc.USER_AGENT},
                    proxies=proxy,
                    timeout=5) as showkey_res:
            showkey_html = BeautifulSoup(showkey_res.text, 'lxml')
        _ban_checker(showkey_html)
        keys['0'] = re_showkey.match(showkey_html('script')[1].string).group(1)
        return keys

    except requests.Timeout:
        raise requests.Timeout('Fetch_keys: Timeout.')
    except AttributeError as e:
        raise exception.ResponseError('Fetch_keys: ' + repr(e))
Пример #4
0
def account_info(se, proxy: dict) -> tuple:
    """
    Get download limitation(used/all).
    Exceptions:
        globj.ResponseError: Raised when server sends abnormal response.
    """
    try:
        with se.get(_ACCOUNT_URL,
                    headers={'User-Agent': misc.USER_AGENT},
                    proxies=proxy,
                    timeout=5) as info_res:
            info_html = BeautifulSoup(info_res.text, 'lxml')
        _ban_checker(info_html)
        info_node = info_html.find('div', class_='homebox')
        if info_node:
            limit = info_node('strong')
            return limit[0].string, limit[1].string
        else:
            raise exception.ResponseError('Account_info: Abnormal response.')
    except requests.Timeout:
        raise requests.Timeout('Account_info: Timeout.')
Пример #5
0
def get_following(se, proxy: dict) -> dict:
    """Get the list of loginned user's following."""
    try:
        with se.get(_ROOT_URL + 'bookmark.php',
                    params={'type': 'user'},
                    headers={'User-Agent': random.choice(misc.USER_AGENT)},
                    proxies=proxy,
                    timeout=5) as fo_res:
            fo_html = BeautifulSoup(fo_res.text, 'lxml')
        fo_node = fo_html.find_all('div', class_='userdata')
        if not fo_node:
            raise exception.ResponseError('Cannot fetch following info.')

        fo_info = {
            ele.a['data-user_id']: ele.a['data-user_name']
            for ele in fo_node
        }
        return fo_info
    except requests.Timeout:
        raise requests.Timeout('Timeout during getting following info.')
    except exception.ResponseError:
        raise
Пример #6
0
def get_detail(se, pid: str, proxy: dict = None) -> dict:
    """
    Get detail of specified illustration.
    Args:
        se: Session instance.
        pid: An id of illustration.
        proxy: (optinal) the proxy used.
    Return:
        A dict contains detail of the illustration.
    """
    re_thumb = re.compile(r'540x540_70')

    try:
        with se.get(_ILLUST_URL + pid,
                    headers={'User-Agent': random.choice(misc.USER_AGENT)},
                    proxies=proxy,
                    timeout=5) as item_detail:
            item_json = json.loads(item_detail.text)
        if item_json['error']:
            raise exception.ResponseError(item_json['message'] +
                                          '(illust detail)')

        item_json = item_json['body']
        create_date = item_json['createDate'].split('T')[0]
        return {
            'illustId': item_json['illustId'],
            'illustTitle': item_json['illustTitle'],
            'createDate': create_date,
            'url': item_json['urls']['original'],
            'thumb': re_thumb.sub('150x150', item_json['urls']['small']),
            'userId': item_json['userId'],
            'userName': item_json['userName'],
            'pageCount': item_json['pageCount']
        }
    except requests.Timeout:
        raise requests.Timeout('Timeout during getting illust detail.')
    except exception.ResponseError:
        raise
Пример #7
0
def get_user(se, proxy: dict) -> tuple:
    """Get username and pixiv id."""
    try:
        with se.get(_ROOT_URL,
                    proxies=proxy,
                    timeout=5,
                    headers={
                        'Referer': 'https://www.pixiv.net/',
                        'User-Agent': random.choice(misc.USER_AGENT)
                    }) as user_res:
            user_info = re.findall(
                r'"userData":{"id":"(\d{1,10})","pixivId":"(.*)","name":"(.*)","profileImg":',
                user_res.text)

        if not user_info:
            raise exception.ResponseError('Cannot fetch user info.')

        user_id = user_info[0][0]
        user_name = user_info[0][2]
        return user_id, user_name
    except requests.Timeout:
        raise requests.Timeout('Timeout during getting user info.')
    except exception.ResponseError:
        raise
Пример #8
0
def download(se,
             proxy: dict,
             info: dict,
             keys: dict,
             page: int,
             path: str,
             rename=False,
             rewrite=False):
    """
    Download one picture.
    Args:
        se: Session instance.
        proxy: (Optional) The proxy used.
        info: Information of the gallery.
        keys: Keys include imgkeys and showkey.
        page: Page number.
        path: Save root path.
        rename: Control whether rename to origin name/image number.
        rewrite: Overwrite image instead of skipping it.
    Exceptions:
        globj.ResponseError: Raised when server sends abnormal response.
        globj.LimitationReachedError: Raised when reach view limitation.
    """
    gid = info['addr'].split('/')[-3]
    try:
        with se.post(_EXHENTAI_URL + 'api.php',
                     json={
                         'method': 'showpage',
                         'gid': int(gid),
                         'page': int(page),
                         'imgkey': keys[str(page)],
                         'showkey': keys['0']
                     },
                     headers={'User-Agent': misc.USER_AGENT},
                     proxies=proxy,
                     timeout=5) as dl_res:  # Fetch original url of picture
            dl_json = dl_res.json()

        if dl_json.get('error'):  # Wrong imgkey or showkey
            raise exception.ResponseError('Download: ' + dl_json['error'])
        if dl_json.get('i3'):  # Whether Reach limitation
            url_html = BeautifulSoup(dl_json['i3'], 'lxml')
            if url_html.a.img['src'] == 'https://exhentai.org/img/509.gif':
                raise exception.LimitationReachedError(page)

        if dl_json.get('i7'):
            url_html = BeautifulSoup(dl_json['i7'], 'lxml')  # Origin image
            origin = url_html.a['href']
        elif dl_json.get('i3'):
            url_html = BeautifulSoup(dl_json['i3'],
                                     'lxml')  # Showing image is original
            origin = url_html.a.img['src']
        else:
            raise exception.ResponseError('Download: No plenty elements.')

        folder_name = misc.name_verify(info['name'])
        folder_path = os.path.join(path, folder_name)
        try:  # Prevent threads starting at same time
            os.makedirs(folder_path)
            print('mkdir:', folder_path)
        except FileExistsError:
            pass
        with se.get(origin,
                    headers={'User-Agent': misc.USER_AGENT},
                    proxies=proxy,
                    stream=True,
                    timeout=5) as pic_res:
            url = pic_res.url
            if url.split(
                    '/'
            )[2] == 'exhentai.org':  # If response cannot redirect(302), raise exception
                raise exception.LimitationReachedError(page)
            file_name = os.path.split(pic_res.url)[-1].rstrip(
                '?dl=1')  # Get file name from url
            if rename:
                file_name = str(page) + os.path.splitext(file_name)[1]
            real_path = os.path.join(folder_path, file_name)
            if not os.path.exists(
                    real_path
            ) or rewrite:  # If file exists or not rewrite, skip it
                if os.path.exists(real_path):
                    os.remove(real_path)
                print('Downloading page {0} to {1}'.format(page, real_path))
                with open(real_path, 'ab') as data:
                    for chunk in pic_res.iter_content():
                        data.write(chunk)
            else:
                print('Skip:', file_name)
    except requests.Timeout:
        raise requests.Timeout('Download: Timeout.')
    except AttributeError as e:
        raise exception.ResponseError('Download: ' + repr(e))
Пример #9
0
def get_new(se, proxy: dict = None, num: int = 0, user_id: str = None) -> set:
    """
    Get new items of following or specified user.
    Args:
        se: Session instance.
        proxy: (optinal) the proxy used.
        num: (optinal when user_id specified) the number of illustration
            will be downloaded. If user_id specified and num omitted,
            all illustration will be downloaded.
        user_id: (optinal) the id of the aimed user. If not given, the new
            illustration will be fetched from following.
    Return:
        A set of pixiv ids fetched.
    """
    try:
        item_dic = {}
        if user_id:  # Fetch user's new illustration
            with se.get(_USER_URL + user_id + '/profile/all',
                        headers={'User-Agent': random.choice(misc.USER_AGENT)},
                        proxies=proxy,
                        timeout=5) as user_res:
                user_json = json.loads(user_res.text)
            if user_json['error']:
                raise exception.ResponseError(user_json['message'] +
                                              '(user pic)')
            user_json = user_json['body']
            if user_json['manga'] and user_json[
                    'illusts']:  # Combine illustration and comic into one dict
                item_dic = {**user_json['illusts'], **user_json['manga']}
            else:
                item_dic = user_json['manga'] if user_json[
                    'manga'] else user_json['illusts']

        else:  # Fetch following's new illustration
            if num // 20 + 1 > 100:  # the limitation of page number is 100
                pn = 100
            else:
                pn = num // 20 + 1 if num else 0
            for p in range(pn):
                with se.get(
                        _ROOT_URL + 'bookmark_new_illust.php',
                        params={'p': str(p + 1)},
                        headers={'User-Agent': random.choice(misc.USER_AGENT)},
                        proxies=proxy,
                        timeout=5) as new_res:
                    new_html = BeautifulSoup(new_res.text, 'lxml')
                new_node = new_html.find(id='js-mount-point-latest-following')
                if not new_node:
                    raise exception.ResponseError(
                        'Cannot fetch new following items.')
                p_json = json.loads(new_node['data-items'])
                item_dic.update({item['illustId']: None for item in p_json})

        item_set = set()
        for item in item_dic:
            item_set.add(item)
            if len(item_set) == num:
                return item_set
        return item_set

    except requests.Timeout:
        raise requests.Timeout('Timeout during getting new items.')
    except exception.ResponseError:
        raise