def login(se, proxy: dict, uid: str, pw: str) -> bool: """ Login and set cookies for exhentai. Exceptions: globj.ValidationError: Raised when username/pw is wrong, or have no permission to get into exhentai. globj.ResponseError: Raised when server sends abnormal response(include AttributeError). """ try: with se.post(_LOGIN_URL, params={ 'act': 'Login', 'CODE': '01' }, data={ 'CookieDate': '1', 'UserName': uid, 'PassWord': pw }, headers={'User-Agent': misc.USER_AGENT}, proxies=proxy, timeout=5) as login_res: login_html = BeautifulSoup(login_res.text, 'lxml') se.cookies.update(login_res.cookies) # Set cookies if login_html.head.title.string == 'Please stand by...': with se.get(_EXHENTAI_URL, proxies=proxy, headers={'User-Agent': misc.USER_AGENT}, timeout=5) as ex_res: ex_html = BeautifulSoup(ex_res.text, 'lxml') if ex_html.head.title.string == 'ExHentai.org': se.cookies.update( ex_res.cookies) # Set cookies for exhentai return True else: raise exception.ValidationError( 'Login: Cannot get into exhentai.') elif login_html.head.title.string == 'Log In': raise exception.ValidationError( 'Login: Incorrect username or password.') else: raise exception.ResponseError('Login: Abnormal response.') except requests.Timeout: raise requests.Timeout('Login: Timeout.') except AttributeError as e: raise exception.ResponseError('Login: ' + repr(e))
def information(se, proxy: dict, addr: str) -> dict: """ Fetch gallery information, include misc info and thumbnail. Args: se: Session instance. proxy: (Optional) The proxy used. addr: Gallery address. Exceptions: globj.ResponseError: Raised when server sends abnormal response. """ re_thumb = re.compile(r'.*url\((.*)\).*') try: with se.get(addr, params={'inline_set': 'ts_m'}, headers={'User-Agent': misc.USER_AGENT}, proxies=proxy, timeout=5) as gallery_res: gallery_html = BeautifulSoup(gallery_res.text, 'lxml') _ban_checker(gallery_html) if 'Gallery not found.' in gallery_html.body.get_text( ) or 'Key missing' in gallery_html.body.get_text(): raise exception.WrongAddressError('Wrong address provided.') name: str = gallery_html.find('h1', id='gj').string # Japanese name is prior if not name: name = gallery_html.find('h1', id='gn').string info = gallery_html.find_all('td', class_='gdt2') thumb = re_thumb.match( gallery_html.find('div', id='gd1').div['style']).group(1) if name and info and thumb: return { 'addr': addr, 'name': name, 'size': info[4].string, 'page': info[5].string[:-6], 'thumb': thumb } else: raise exception.ResponseError('Information: Abnormal response.') except requests.Timeout: raise requests.Timeout('Information: Timeout.') except AttributeError as e: raise exception.ResponseError('Information: ' + repr(e))
def fetch_keys(se, proxy: dict, info: dict) -> dict: """ Fetch keys(imgkeys and showkey) from gallery. Args: se: Session instance. proxy: (Optional) The proxy used. info: Information of the gallery. Return: A dictionary. {'page': imgkey, '0': showkey} Exceptions: globj.ResponseError: Raised when server sends abnormal response. """ re_imgkey = re.compile(r'https://exhentai\.org/s/(\w{10})/\d*-(\d{1,4})') re_showkey = re.compile(r'[\S\s]*showkey="(\w{11})"[\S\s]*') gid = info['addr'].split('/')[-3] pn = int(info['page']) // 40 + 1 # range(0) has no element keys = dict() try: for p in range(pn): with se.get(info['addr'], params={ 'inline_set': 'ts_m', 'p': p }, headers={'User-Agent': misc.USER_AGENT}, proxies=proxy, timeout=5) as gallery_res: gallery_html = BeautifulSoup(gallery_res.text, 'lxml') _ban_checker(gallery_html) # Fetch imgkey from every picture pics = gallery_html.find_all('div', class_='gdtm') for item in pics: match = re_imgkey.match(item.a['href']) keys[match.group(2)] = match.group(1) # Fetch showkey from first picture showkey_url = '/'.join( ['https://exhentai.org/s', keys['1'], gid + '-1']) with se.get(showkey_url, headers={'User-Agent': misc.USER_AGENT}, proxies=proxy, timeout=5) as showkey_res: showkey_html = BeautifulSoup(showkey_res.text, 'lxml') _ban_checker(showkey_html) keys['0'] = re_showkey.match(showkey_html('script')[1].string).group(1) return keys except requests.Timeout: raise requests.Timeout('Fetch_keys: Timeout.') except AttributeError as e: raise exception.ResponseError('Fetch_keys: ' + repr(e))
def account_info(se, proxy: dict) -> tuple: """ Get download limitation(used/all). Exceptions: globj.ResponseError: Raised when server sends abnormal response. """ try: with se.get(_ACCOUNT_URL, headers={'User-Agent': misc.USER_AGENT}, proxies=proxy, timeout=5) as info_res: info_html = BeautifulSoup(info_res.text, 'lxml') _ban_checker(info_html) info_node = info_html.find('div', class_='homebox') if info_node: limit = info_node('strong') return limit[0].string, limit[1].string else: raise exception.ResponseError('Account_info: Abnormal response.') except requests.Timeout: raise requests.Timeout('Account_info: Timeout.')
def get_following(se, proxy: dict) -> dict: """Get the list of loginned user's following.""" try: with se.get(_ROOT_URL + 'bookmark.php', params={'type': 'user'}, headers={'User-Agent': random.choice(misc.USER_AGENT)}, proxies=proxy, timeout=5) as fo_res: fo_html = BeautifulSoup(fo_res.text, 'lxml') fo_node = fo_html.find_all('div', class_='userdata') if not fo_node: raise exception.ResponseError('Cannot fetch following info.') fo_info = { ele.a['data-user_id']: ele.a['data-user_name'] for ele in fo_node } return fo_info except requests.Timeout: raise requests.Timeout('Timeout during getting following info.') except exception.ResponseError: raise
def get_detail(se, pid: str, proxy: dict = None) -> dict: """ Get detail of specified illustration. Args: se: Session instance. pid: An id of illustration. proxy: (optinal) the proxy used. Return: A dict contains detail of the illustration. """ re_thumb = re.compile(r'540x540_70') try: with se.get(_ILLUST_URL + pid, headers={'User-Agent': random.choice(misc.USER_AGENT)}, proxies=proxy, timeout=5) as item_detail: item_json = json.loads(item_detail.text) if item_json['error']: raise exception.ResponseError(item_json['message'] + '(illust detail)') item_json = item_json['body'] create_date = item_json['createDate'].split('T')[0] return { 'illustId': item_json['illustId'], 'illustTitle': item_json['illustTitle'], 'createDate': create_date, 'url': item_json['urls']['original'], 'thumb': re_thumb.sub('150x150', item_json['urls']['small']), 'userId': item_json['userId'], 'userName': item_json['userName'], 'pageCount': item_json['pageCount'] } except requests.Timeout: raise requests.Timeout('Timeout during getting illust detail.') except exception.ResponseError: raise
def get_user(se, proxy: dict) -> tuple: """Get username and pixiv id.""" try: with se.get(_ROOT_URL, proxies=proxy, timeout=5, headers={ 'Referer': 'https://www.pixiv.net/', 'User-Agent': random.choice(misc.USER_AGENT) }) as user_res: user_info = re.findall( r'"userData":{"id":"(\d{1,10})","pixivId":"(.*)","name":"(.*)","profileImg":', user_res.text) if not user_info: raise exception.ResponseError('Cannot fetch user info.') user_id = user_info[0][0] user_name = user_info[0][2] return user_id, user_name except requests.Timeout: raise requests.Timeout('Timeout during getting user info.') except exception.ResponseError: raise
def download(se, proxy: dict, info: dict, keys: dict, page: int, path: str, rename=False, rewrite=False): """ Download one picture. Args: se: Session instance. proxy: (Optional) The proxy used. info: Information of the gallery. keys: Keys include imgkeys and showkey. page: Page number. path: Save root path. rename: Control whether rename to origin name/image number. rewrite: Overwrite image instead of skipping it. Exceptions: globj.ResponseError: Raised when server sends abnormal response. globj.LimitationReachedError: Raised when reach view limitation. """ gid = info['addr'].split('/')[-3] try: with se.post(_EXHENTAI_URL + 'api.php', json={ 'method': 'showpage', 'gid': int(gid), 'page': int(page), 'imgkey': keys[str(page)], 'showkey': keys['0'] }, headers={'User-Agent': misc.USER_AGENT}, proxies=proxy, timeout=5) as dl_res: # Fetch original url of picture dl_json = dl_res.json() if dl_json.get('error'): # Wrong imgkey or showkey raise exception.ResponseError('Download: ' + dl_json['error']) if dl_json.get('i3'): # Whether Reach limitation url_html = BeautifulSoup(dl_json['i3'], 'lxml') if url_html.a.img['src'] == 'https://exhentai.org/img/509.gif': raise exception.LimitationReachedError(page) if dl_json.get('i7'): url_html = BeautifulSoup(dl_json['i7'], 'lxml') # Origin image origin = url_html.a['href'] elif dl_json.get('i3'): url_html = BeautifulSoup(dl_json['i3'], 'lxml') # Showing image is original origin = url_html.a.img['src'] else: raise exception.ResponseError('Download: No plenty elements.') folder_name = misc.name_verify(info['name']) folder_path = os.path.join(path, folder_name) try: # Prevent threads starting at same time os.makedirs(folder_path) print('mkdir:', folder_path) except FileExistsError: pass with se.get(origin, headers={'User-Agent': misc.USER_AGENT}, proxies=proxy, stream=True, timeout=5) as pic_res: url = pic_res.url if url.split( '/' )[2] == 'exhentai.org': # If response cannot redirect(302), raise exception raise exception.LimitationReachedError(page) file_name = os.path.split(pic_res.url)[-1].rstrip( '?dl=1') # Get file name from url if rename: file_name = str(page) + os.path.splitext(file_name)[1] real_path = os.path.join(folder_path, file_name) if not os.path.exists( real_path ) or rewrite: # If file exists or not rewrite, skip it if os.path.exists(real_path): os.remove(real_path) print('Downloading page {0} to {1}'.format(page, real_path)) with open(real_path, 'ab') as data: for chunk in pic_res.iter_content(): data.write(chunk) else: print('Skip:', file_name) except requests.Timeout: raise requests.Timeout('Download: Timeout.') except AttributeError as e: raise exception.ResponseError('Download: ' + repr(e))
def get_new(se, proxy: dict = None, num: int = 0, user_id: str = None) -> set: """ Get new items of following or specified user. Args: se: Session instance. proxy: (optinal) the proxy used. num: (optinal when user_id specified) the number of illustration will be downloaded. If user_id specified and num omitted, all illustration will be downloaded. user_id: (optinal) the id of the aimed user. If not given, the new illustration will be fetched from following. Return: A set of pixiv ids fetched. """ try: item_dic = {} if user_id: # Fetch user's new illustration with se.get(_USER_URL + user_id + '/profile/all', headers={'User-Agent': random.choice(misc.USER_AGENT)}, proxies=proxy, timeout=5) as user_res: user_json = json.loads(user_res.text) if user_json['error']: raise exception.ResponseError(user_json['message'] + '(user pic)') user_json = user_json['body'] if user_json['manga'] and user_json[ 'illusts']: # Combine illustration and comic into one dict item_dic = {**user_json['illusts'], **user_json['manga']} else: item_dic = user_json['manga'] if user_json[ 'manga'] else user_json['illusts'] else: # Fetch following's new illustration if num // 20 + 1 > 100: # the limitation of page number is 100 pn = 100 else: pn = num // 20 + 1 if num else 0 for p in range(pn): with se.get( _ROOT_URL + 'bookmark_new_illust.php', params={'p': str(p + 1)}, headers={'User-Agent': random.choice(misc.USER_AGENT)}, proxies=proxy, timeout=5) as new_res: new_html = BeautifulSoup(new_res.text, 'lxml') new_node = new_html.find(id='js-mount-point-latest-following') if not new_node: raise exception.ResponseError( 'Cannot fetch new following items.') p_json = json.loads(new_node['data-items']) item_dic.update({item['illustId']: None for item in p_json}) item_set = set() for item in item_dic: item_set.add(item) if len(item_set) == num: return item_set return item_set except requests.Timeout: raise requests.Timeout('Timeout during getting new items.') except exception.ResponseError: raise