def wca_url_request_handler(self, target_url, post_data, timeout, target_page_word, logpath): """Universal URL request format handler @@API that allows external calls If no need log, set log path to None :param target_url: target request url :param post_data: post way data :param timeout: request timeout, suggest 30s :param target_page_word: target page symbol word :param logpath: log save path :return: request result response(raw) """ try: response = self.opener.open(fullurl=target_url, data=post_data, timeout=timeout) except Exception as e: log_content = dl.BR_CB('%s response failed, error: %s' % (target_page_word, str(e))) self.wca_logprowork(logpath, log_content) exit(dl.PUB_E_RESPONSE_FAIL) # can't process error, directly exit if response.getcode() == dl.HTTP_REP_OK_CODE: log_content = target_page_word + ' response ok' else: log_content = dl.BR_CB(target_page_word + ' return code %d' % response.getcode()) self.wca_logprowork(logpath, log_content) return response
def wca_camouflage_login(self): """Camouflage browser to login If login failed, program will exit here @@API that allows external calls :return: status code """ if WkvCwApi._login_once_flag: return dl.PUB_E_OK else: WkvCwApi._login_once_flag = True if self._gatherpostkey() != dl.PUB_E_OK: exit(dl.PUB_E_RESPONSE_FAIL) cookie_jar = self._get_chrome_cookie(dl.local_cache_cookie_path, dl.HTTPS_HOST_URL) self.cookieHandler = urllib.request.HTTPCookieProcessor(cookie_jar) self.opener = urllib.request.build_opener(self.cookieHandler) urllib.request.install_opener(self.opener) response = self.wca_url_request_handler( target_url=dl.LOGIN_REQUEST_API_URL, post_data=self.postway_data, timeout=30, target_page_word='login', logpath=None) if response == dl.PUB_E_RESPONSE_FAIL: dl.LT_PRINT( dl.BR_CB('login response return a boolean FALSE, exit')) exit(dl.PUB_E_RESPONSE_FAIL) web_src = response.read().decode("UTF-8", "ignore") dl.LT_PRINT( dl.BY_CB('response source: %s' % web_src.encode("UTF-8").decode("unicode_escape"))) login_info_pattern = re.compile(dl.LOGIN_INFO_REGEX, re.S) response_info = re.findall(login_info_pattern, web_src) if response_info: if response_info[0] != 'false': # error false means no error dl.LT_PRINT(dl.BR_CB('login confirm raise a error, exit')) exit(dl.PUB_E_RESPONSE_FAIL) else: dl.LT_PRINT('login check response right') else: dl.LT_PRINT('login confirm response no error status') exit(dl.PUB_E_RESPONSE_FAIL)
def rtn_gather_essential_info(page_opt, whole_nbr): """Get input image count If user input number more than whole number, set target count is whole number Only intercative mode call this function :param page_opt: select ranktop ordinary or r18 mode :param whole_nbr: whole ranking crawl count :return: crawl images count """ img_cnt = 0 if page_opt == dl.PAGE_ORDINARY: label = 'ordinary' elif page_opt == dl.PAGE_R18: label = 'r18' elif page_opt == dl.PAGE_R18G: label = 'r18g' else: dl.nolog_raise_arguerr() return dl.PUB_E_PARAM_FAIL img_str = dl.LT_INPUT(dl.HL_CY('crawl %s valid target %d, enter you want: ' % (label, whole_nbr))) while not img_str.isdigit(): img_str = dl.LT_INPUT(dl.HL_CY('input error, enter again(max is %d): ' % whole_nbr)) img_cnt = int(img_str) if img_cnt <= 0: dl.LT_PRINT(dl.BR_CB('what the f**k is wrong with you?')) return dl.PUB_E_PARAM_FAIL if img_cnt > whole_nbr: img_cnt = whole_nbr return img_cnt
def wca_commit_spansizer(whole_pattern, info_pattern, web_src): """A sizer for all of images in once commit item @@API that allows external calls After Pixiv 20181002 update, this method only support mode rtn :param whole_pattern: whole info data regex compile pattern :param info_pattern: image info regex compile pattern :param web_src: webpage source :return: original target url list & image info list dict """ img_info_lst = [] tgt_url_lst = [] datasrc_pattern = re.compile(dl.DATASRC_REGEX, re.S) span_pattern = re.compile(dl.SPAN_REGEX, re.S) img_whole_info = re.findall(whole_pattern, web_src) # image have 3 format: jpg/png/gif # this crawler will give gif format up and crawl png or jpg # pixiv one repertory maybe have multi-images for item in img_whole_info: tmp_thumbnail = re.findall(datasrc_pattern, item) if not tmp_thumbnail: dl.LT_PRINT(dl.BR_CB('span sizer regex cannot get valid info')) return dl.PUB_E_FAIL thumbnail = tmp_thumbnail[0] judge_word = thumbnail[-18:] # check jpg/png or gif if judge_word == dl.JUDGE_NOGIF_WORD: span_word = re.findall(span_pattern, item) vaild_word = thumbnail[44:-18] # try to check multi-span images if len(span_word) != 0: for _px in range(int(span_word[0])): info = re.findall(info_pattern, item)[0] img_info_lst.append(info) # more pages point, range 0~span-1 target_url = dl.ORIGINAL_IMAGE_HEAD + vaild_word + dl.ORIGINAL_IMAGE_TAIL( _px) tgt_url_lst.append(target_url) # just only one picture in a commit else: info = re.findall(info_pattern, item)[0] img_info_lst.append(info) # only _p0 page target_url = dl.ORIGINAL_IMAGE_HEAD + vaild_word + dl.ORIGINAL_IMAGE_TAIL( 0) tgt_url_lst.append(target_url) # give up gif format, or list is empty else: pass return {'url lst': tgt_url_lst, 'info lst': img_info_lst}
def rtn_gather_rankingdata(self): """Crawl dailyRank list :return: status code """ response = self.wkv_cw_api.wca_url_request_handler(target_url=self.rtn_req_url, post_data=self.wkv_cw_api.getway_data, timeout=30, target_page_word='rankpage', logpath=self.logpath) # size info in webpage source web_src = response.read().decode("UTF-8", "ignore") imgitem_pattern = re.compile(dl.RANKING_SECTION_REGEX, re.S) info_pattern = re.compile(dl.RANKING_INFO_REGEX, re.S) sizer_result = self.wkv_cw_api.wca_commit_spansizer(imgitem_pattern, info_pattern, web_src) if sizer_result == dl.PUB_E_FAIL: return dl.PUB_E_FAIL url_lst = sizer_result['url lst'] img_info_lst = sizer_result['info lst'] # cut need image count to be target list valid_url_cnt = len(url_lst) if self.ir_mode == dl.MODE_INTERACTIVE: img_nbr = self.rtn_gather_essential_info(self.page_opt, valid_url_cnt) if img_nbr == dl.PUB_E_PARAM_FAIL: return dl.PUB_E_FAIL elif self.ir_mode == dl.MODE_SERVER: img_nbr = valid_url_cnt # server mode directly get all of alive targets dl.LT_PRINT(dl.BY_CB('server mode auto crawl all of alive targets')) self.rtn_target_urls = url_lst[:img_nbr] log_content = dl.BY_CB('crawl ranking top ' + str(img_nbr) + ', target table:') self.wkv_cw_api.wca_logprowork(self.logpath, log_content) image_info_table = PrettyTable( ["ImageNumber", "ImageID", "ImageTitle", "ImageID+PageNumber", "AuthorID", "AuthorName"]) for k, i in enumerate(img_info_lst[:img_nbr]): self.rtn_basepages.append(dl.BASEPAGE_URL(i[3])) # url request header use image_info_table.add_row([(k + 1), i[3], i[1], dl.FROM_URL_GET_IMG_NAME(self.rtn_target_urls[k]), i[4], i[2]]) # damn emoji, maybe dump failed try: self.wkv_cw_api.wca_logprowork(self.logpath, str(image_info_table), False) except Exception as e: dl.LT_PRINT(dl.BR_CB('error: %s, dump prettytable interrupt' % str(e))) return dl.PUB_E_OK
def create(self): """Create a new thread Use built-in queue to manage threads list :return: status flag """ self.lock_t.acquire() self.queue_t.append(self) self.lock_t.release() # if the system has insufficient memory # it will not be able to create more threads # this step will fail try: self.start() except Exception as e: log_content = dl.BR_CB("Error type: " + str(e)) WkvCwApi.wca_logprowork(log_content, self.logpath) return dl.PUB_E_FAIL return dl.PUB_E_OK
def run(self): """Overwrite threading.thread run() method :return: none """ try: # package download one image thread # default set server mode here(actually it doesn’t matter) WkvCwApi(2)._save_oneimage(self.index, self.url, self.basepages, self.workdir, self.logpath) except Exception as e: log_content = dl.BR_CB("Error type: " + str(e)) WkvCwApi.wca_logprowork(log_content, self.logpath) self.lock_t.acquire() # thread queue adjust, lock it self.queue_t.remove(self) # remove end thread from list if len(self.queue_t) == dl.SYSTEM_MAX_THREADS - 1: self.event_t.set() self.event_t.clear() self.lock_t.release()
def wca_download_alltarget(self, logpath, urls, basepages, workdir): """Multi-process download all image @@API that allows external calls :param urls: all original images urls :param basepages: all referer basic pages :param workdir: work directory :param logpath: log save path :return: none """ thread_block_flag = False # thread blocking flag alive_thread_cnt = queueLength = len(urls) log_content = dl.BY_CB('hit %d target(s), start download task(s)' % queueLength) self.wca_logprowork(logpath, log_content) # capture timeout and the user interrupt fault and exit the failed thread try: for i, one_url in enumerate(urls): self._MultiThreading.lock_t.acquire() if len(self._MultiThreading.queue_t) > dl.SYSTEM_MAX_THREADS: thread_block_flag = True self._MultiThreading.lock_t.release() # if the number of created threads reach max limit # program will stop here, wait all of threads have been created over # when one thread executed over, create next one self._MultiThreading.event_t.wait() else: self._MultiThreading.lock_t.release() # build overwrite threading.Thread object sub_thread = self._MultiThreading(i, one_url, basepages, workdir, logpath) # set every download sub-process daemon property # set false, then if you exit one thread, others threads will not end # set true, quit one is quit all sub_thread.setDaemon(True) # if create this sub-thread failed from function if sub_thread.create() == dl.PUB_E_FAIL: log_content = dl.BR_CB('create a new sub-thread failed') print(log_content) return dl.PUB_E_FAIL if thread_block_flag == False: log_content = dl.BY_CB( 'created {:d} download target object(s)') else: log_content = dl.BY_CB( 'created {:d} download target object(s), thread creation is blocked, please wait' ) dl.LT_FLUSH(log_content, i + 1) print(dl.BY_CB(', all threads have been loaded OK')) thread_block_flag = False # parent thread wait all sub-thread end # the count of all threads is 1 parent thread and n sub-thread(s) # when all pictures have been downloaded over, thread count is 1 while alive_thread_cnt > 1: # global variable update self.alivethread_counter = threading.active_count() # when alive thread count change, print its value if alive_thread_cnt != self.alivethread_counter: alive_thread_cnt = self.alivethread_counter # update alive thread count # display alive sub-thread count # its number wouldn't more than thread max count log_content = dl.BY_CB( 'currently remaining sub-thread(s):({:4d}/{:4d}), completed:({:4.1%})|({:5.2f}MB)' ) dl.LT_FLUSH(log_content, alive_thread_cnt - 1, queueLength, ((queueLength - (alive_thread_cnt - 1)) / queueLength), (float(WkvCwApi._datastream_pool / 1024))) print(dl.BY_CB(', sub-threads execute finished')) except KeyboardInterrupt: print(dl.BY_CB(', user interrupt a thread, exit all threads'))
def rtn_target_confirm(self): """Input option and confirm target :return: status code """ req_url = None # request target ranking url rank_word = None # ranking word dwm_opt = None # daily/weekly/monthly if self.ir_mode == dl.MODE_INTERACTIVE: page_opt = dl.LT_INPUT(dl.HL_CY('select ranking type, ordinary(1) | r18(2) | r18g(3): ')) sex_opt = dl.LT_INPUT(dl.HL_CY('select sex favor, normal(0) | male(1) | female(2): ')) elif self.ir_mode == dl.MODE_SERVER: page_opt = self.rtn_r18_arg sex_opt = self.rtn_sex_opt else: dl.nolog_raise_arguerr() return dl.PUB_E_PARAM_FAIL if page_opt == dl.PAGE_ORDINARY: if self.ir_mode == dl.MODE_INTERACTIVE: dwm_opt = dl.LT_INPUT(dl.HL_CY('select daily(1) | weekly(2) | monthly(3) ordinary ranking type: ')) elif self.ir_mode == dl.MODE_SERVER: dwm_opt = self.rtn_rank_type else: dl.nolog_raise_arguerr() return dl.PUB_E_PARAM_FAIL if dwm_opt == dl.RANK_DAILY: if sex_opt == dl.SEX_NORMAL: req_url = dl.RANK_DAILY_URL rank_word = dl.DAILY_WORD elif sex_opt == dl.SEX_MALE: req_url = dl.RANK_DAILY_MALE_URL rank_word = dl.MALE_WORD elif sex_opt == dl.SEX_FEMALE: req_url = dl.RANK_DAILY_FEMALE_URL rank_word = dl.FEMALE_WORD else: dl.nolog_raise_arguerr() return dl.PUB_E_PARAM_FAIL elif dwm_opt == dl.RANK_WEEKLY: req_url = dl.RANK_WEEKLY_URL rank_word = dl.WEEKLY_WORD elif dwm_opt == dl.RANK_MONTHLY: req_url = dl.RANK_MONTHLY_URL rank_word = dl.MONTHLY_WORD else: dl.nolog_raise_arguerr() return dl.PUB_E_PARAM_FAIL elif page_opt == dl.PAGE_R18: if self.ir_mode == dl.MODE_INTERACTIVE: dwm_opt = dl.LT_INPUT(dl.HL_CY('select daily(1)/weekly(2) R18 ranking type: ')) elif self.ir_mode == dl.MODE_SERVER: dwm_opt = self.rtn_rank_type else: dl.nolog_raise_arguerr() return dl.PUB_E_PARAM_FAIL if dwm_opt == dl.RANK_DAILY: if sex_opt == dl.SEX_NORMAL: req_url = dl.RANK_DAILY_R18_URL rank_word = dl.DAILY_WORD elif sex_opt == dl.SEX_MALE: req_url = dl.RANK_DAILY_MALE_R18_URL rank_word = dl.MALE_WORD elif sex_opt == dl.SEX_FEMALE: req_url = dl.RANK_DAILY_FEMALE_R18_URL rank_word = dl.FEMALE_WORD else: dl.nolog_raise_arguerr() return dl.PUB_E_PARAM_FAIL elif dwm_opt == dl.RANK_WEEKLY: req_url = dl.RANK_WEEKLY_R18_URL rank_word = dl.WEEKLY_WORD else: dl.nolog_raise_arguerr() return dl.PUB_E_PARAM_FAIL elif page_opt == dl.PAGE_R18G: req_url = dl.RANK_R18G_URL rank_word = dl.R18G_WORD dl.LT_PRINT(dl.BR_CB('warning: you choose the r18g rank, hope you know what it means')) else: dl.nolog_raise_arguerr() return dl.PUB_E_PARAM_FAIL log_content = dl.BY_CB('base select option, set rank target url: [%s]' % req_url) self.wkv_cw_api.wca_logprowork(self.logpath, log_content) self.rtn_req_url = req_url self.page_opt = page_opt return dl.PUB_E_OK
def ira_crawl_allpage_target(self): """Package all gather urls :return: status code """ require_page_cnt = 0 if self.ira_max_cnt <= dl.ONE_PAGE_COMMIT: require_page_cnt = 1 else: require_page_cnt = int(self.ira_max_cnt / dl.ONE_PAGE_COMMIT) # remainder decision if self.ira_max_cnt % dl.ONE_PAGE_COMMIT != 0: require_page_cnt += 1 # build the json data url iid_string_tail = '' subpage_url_list = [] for ix in range(require_page_cnt): # one subpage only include 6*8 valid image, others are invalid tmp_tail_nbr = dl.ONE_PAGE_COMMIT * (ix + 1) tmp_tail_nbr = self.ira_max_cnt if tmp_tail_nbr > self.ira_max_cnt else tmp_tail_nbr for index in self.ira_pure_idlist[(dl.ONE_PAGE_COMMIT * ix):tmp_tail_nbr]: iid_string_tail += dl.IDS_UNIT(index) subpage_url_list.append(dl.ALLREPOINFO_URL(self.user_input_id, iid_string_tail, 1 if ix == 0 else 0)) iid_string_tail = '' # clear last cache # get all data from response xhr page into a temp list tmp_receive_list = [] tmp_ret = [] for i in range(require_page_cnt): tmp_ret = self.ira_crawl_subpage_data(i + 1, subpage_url_list[i]) if not isinstance(tmp_ret, list): return dl.PUB_E_FAIL tmp_receive_list += tmp_ret repo_target_all_list = [] for i in range(len(tmp_receive_list)): tmp_receive_list[i][1] = dl.UNICODE_ESCAPE(tmp_receive_list[i][1]) tmp_receive_list[i][1] = dl.EMOJI_REPLACE(tmp_receive_list[i][1]) # build original url without image format tmp = tmp_receive_list[i][2] tmp = tmp.replace('\\', '') tmp_receive_list[i][2] = dl.ORIGINAL_IMAGE_HEAD + tmp[-39:-7] + '.png' # first original url repo_target_all_list.append(tmp_receive_list[i]) # add other original image url by pageCount tmp_page_count_str = tmp_receive_list[i][3] if tmp_page_count_str.isdigit(): index_page_count = int(tmp_page_count_str) if index_page_count != 1: for px in range(index_page_count): insert_item = [tmp_receive_list[i][0], tmp_receive_list[i][1], tmp_receive_list[i][2][:-5] + str(px) + '.png', tmp_receive_list[i][3]] repo_target_all_list.append(insert_item) else: log_content = dl.BR_CB('page count process error') self.wkv_cw_api.wca_logprowork(self.logpath, log_content) return dl.PUB_E_FAIL del tmp_receive_list alive_target_cnt = len(repo_target_all_list) require_img_nbr = 0 if self.ir_mode == dl.MODE_INTERACTIVE: require_img_str = dl.LT_INPUT(dl.HL_CY('crawl all repo %d, whole target(s): %d, enter you want count: ' % (self.ira_max_cnt, alive_target_cnt))) # if user input isn't number while not require_img_str.isdigit(): dl.LT_PRINT(dl.BR_CB('input error, your input content was not a decimal number')) require_img_str = dl.LT_INPUT(dl.HL_CY('enter again(max is %d): ' % alive_target_cnt)) require_img_nbr = int(require_img_str) if require_img_nbr <= 0: dl.LT_PRINT(dl.BR_CB('what the f**k is wrong with you?')) return dl.PUB_E_PARAM_FAIL require_img_nbr = alive_target_cnt if require_img_nbr > alive_target_cnt else require_img_nbr elif self.ir_mode == dl.MODE_SERVER: require_img_nbr = alive_target_cnt dl.LT_PRINT(dl.BY_CB('server mode auto crawl all of alive targets')) else: pass for k, i in enumerate(repo_target_all_list[:require_img_nbr]): self.ira_target_capture.append(i[2]) self.ira_basepages.append(dl.BASEPAGE_URL(i[0])) log_content = 'illustrator [%s] id [%s], require image(s): %d, target table:' \ % (self.ira_author_name, self.user_input_id, require_img_nbr) self.wkv_cw_api.wca_logprowork(self.logpath, log_content) image_info_table = PrettyTable(["ImageNumber", "ImageID", "ImageTitle", "ImagePageName"]) for k, i in enumerate(repo_target_all_list[:require_img_nbr]): image_info_table.add_row([(k + 1), i[0], i[1], dl.FROM_URL_GET_IMG_NAME(i[2])]) # damn emoji, maybe dump failed try: self.wkv_cw_api.wca_logprowork(self.logpath, str(image_info_table), False) except Exception as e: dl.LT_PRINT(dl.BR_CB('error: %s, dump prettytable interrupt' % str(e))) del repo_target_all_list return dl.PUB_E_OK
def ira_crawl_subpage_data(self, index, index_url): """Crawl a subpage all data :param index: request page index :param index_url: index group url :return: one page get info list(2-d) """ response = self.wkv_cw_api.wca_url_request_handler(target_url=index_url, post_data=self.wkv_cw_api.getway_data, timeout=30, target_page_word='subpage %d' % index, logpath=self.logpath) # 20181002 event effect: cannot get web source, this web_src is server return raw json data web_src = response.read().decode("UTF-8", "ignore") ## self.wkv_cw_api.wca_save_test_html('all-repo', 'E:\\OperationCache', web_src) error_status_pattern = re.compile(dl.PAGE_REQUEST_SYM_REGEX, re.S) error_status_list = re.findall(error_status_pattern, web_src) if not error_status_list: log_content = dl.BR_CB('regex get error status failed') self.wkv_cw_api.wca_logprowork(self.logpath, log_content) return dl.PUB_E_REGEX_FAIL error_status = error_status_list[0] if error_status == 'true': log_content = dl.BR_CB('subpage %d response failed' % index) self.wkv_cw_api.wca_logprowork(self.logpath, log_content) return dl.PUB_E_RESPONSE_FAIL # crawl one page items info page_target_pattern = re.compile(dl.PAGE_TGT_INFO_SQUARE_REGEX, re.S) page_tgt_info_tpe = re.findall(page_target_pattern, web_src) if not page_tgt_info_tpe: log_content = dl.BR_CB('regex get target page info failed') return dl.PUB_E_REGEX_FAIL # tuple transform to list tmp_target_info_list = [] for i in range(len(page_tgt_info_tpe)): tmp_target_info_list.append([]) for j in range(len(page_tgt_info_tpe[i])): tmp_target_info_list[i] = list(page_tgt_info_tpe[i]) del page_tgt_info_tpe # check artwork type, delete gif tgt_info_comp_works = [] illust_type_pattern = re.compile(dl.ILLUST_TYPE_REGEX, re.S) for k in range(len(tmp_target_info_list)): illust_type_sym = re.findall(illust_type_pattern, tmp_target_info_list[k][2]) if len(illust_type_sym) == 0: log_content = dl.BR_CB('illust type process error') self.wkv_cw_api.wca_logprowork(self.logpath, log_content) return dl.PUB_E_FAIL if illust_type_sym[0] == dl.GIF_TYPE_LABEL: continue # delete unuseful data del tmp_target_info_list[k][2] del tmp_target_info_list[k][-2] tgt_info_comp_works.append(tmp_target_info_list[k]) del tmp_target_info_list return tgt_info_comp_works
def ira_gather_preloadinfo(self): """Crawler need to know how many images do you want This function will get author name base on author id :return: status code """ # request all of one illustrator's artworks response = self.wkv_cw_api.wca_url_request_handler(target_url=dl.AJAX_ALL_URL(self.user_input_id), post_data=self.wkv_cw_api.getway_data, timeout=30, target_page_word='ajaxpage', logpath=self.logpath) # get artworks id list web_src = response.read().decode("UTF-8", "ignore") ajax_idlist_pattern = re.compile(dl.AJAX_ALL_IDLIST_REGEX, re.S) ajax_idlist = re.findall(ajax_idlist_pattern, web_src) if not ajax_idlist: log_content = dl.BR_CB('regex get ajax id list fail') self.wkv_cw_api.wca_logprowork(self.logpath, log_content) return dl.PUB_E_REGEX_FAIL number_pattern = re.compile(dl.NUMBER_REGEX, re.S) for index in ajax_idlist: if index.isdigit(): self.ira_pure_idlist.append(index) else: # id list result may include some garbages, use number regex get pure result one_pure_id = re.findall(number_pattern, index) if one_pure_id: self.ira_pure_idlist.append(one_pure_id[0]) else: pass # website server require the descending list of sort artwork id pure_idlist_nbr = [] for index in self.ira_pure_idlist: pure_idlist_nbr.append(int(index)) self.wkv_cw_api.wca_quick_sort(pure_idlist_nbr, 0, len(pure_idlist_nbr) - 1) self.ira_pure_idlist.clear() for index in reversed(pure_idlist_nbr): self.ira_pure_idlist.append(str(index)) del pure_idlist_nbr self.ira_max_cnt = len(self.ira_pure_idlist) # get author name from member-main-page illust_mainpage_url = dl.USERS_ARTWORKS_URL(self.user_input_id) log_content = dl.HL_CY('crawl illustrator url: [%s]' % illust_mainpage_url) self.wkv_cw_api.wca_logprowork(self.logpath, log_content) response = self.wkv_cw_api.wca_url_request_handler(target_url=illust_mainpage_url, post_data=self.wkv_cw_api.getway_data, timeout=30, target_page_word='mainpage', logpath=self.logpath) # match illustrator name web_src = response.read().decode("UTF-8", "ignore") illust_name_pattern = re.compile(dl.ILLUST_NAME_REGEX(self.user_input_id), re.S) author_info = re.findall(illust_name_pattern, web_src) if not author_info: # cannot catch illust name in mainpage if login failed dl.LT_PRINT(dl.BR_CB("Regex parsing result error, no author info")) return dl.PUB_E_REGEX_FAIL self.ira_author_name = author_info[0] log_content = dl.HL_CY('check illustrator: [%s]' % self.ira_author_name) self.wkv_cw_api.wca_logprowork(self.logpath, log_content) return dl.PUB_E_OK