def logprowork(log_path, log_content, withtime='y'): """Universal work log save @@API that allows external calls Notice: If here print series fucntion raise UnicodeEncodeError, it must web page include emoji symbol encode title when use prettytable to package title info :param log_path: log save path :param log_content: log save content :param withtime: default parameter, print and save with real time or not :return: none """ # add context to the file use option 'a+' # write content may have some not utf8 code, example Japanese log_file_ptr = open(log_path, 'a+', encoding='utf-8') # select add real time word or not if withtime == 'y': dataload.logtime_print(log_content) # use variable-length argument write word to the log file log_file_ptr.write( dataload.realtime_logword(dataload.base_time) + log_content + '\n') else: print(log_content) log_file_ptr.write(log_content + '\n') log_file_ptr.close()
def gather_rankingdata(self, option): """Crawl dailyRank list :param option: user choose option :return: none """ response = self.pvmx.url_request_handler( target_url=option[0], post_data=self.pvmx.login_bias[2], timeout=30, target_page_word='Rankpage', need_log=True, log_path=self.logpath) # size info in webpage source web_src = response.read().decode("UTF-8", "ignore") imgitem_pattern = re.compile(dataload.RANKING_SECTION_REGEX, re.S) info_pattern = re.compile(dataload.RANKING_INFO_REGEX, re.S) sizer_result = self.pvmx.commit_spansizer(imgitem_pattern, info_pattern, web_src) # whole data cache pool whole_urls, img_infos = sizer_result[0], sizer_result[1] # cut need image count to be target list alive_targets = len(whole_urls) if self.ir_mode == 1: img_nbr = self.gather_essential_info(option[1], alive_targets) # server mode directly get all of alive targets elif self.ir_mode == 2: img_nbr = alive_targets dataload.logtime_print( 'Server mode auto crawl all of alive targets') self.target_urls = whole_urls[:img_nbr] log_context = 'Gather rankingtop ' + str(img_nbr) + ', target table:' self.pvmx.logprowork(self.logpath, log_context) # use prettytable package info list image_info_table = PrettyTable([ "ImageNumber", "ImageID", "ImageTitle", "ImageID+PageNumber", "AuthorID", "AuthorName" ]) for k, i in enumerate(img_infos[:img_nbr]): # basepage will be a headers referer self.basepages.append(dataload.BASEPAGE_URL + i[3]) image_info_table.add_row([(k + 1), i[3], i[1], self.target_urls[k][57:-4], i[4], i[2]]) # save table without time header word self.pvmx.logprowork(self.logpath, str(image_info_table), 'N')
def _gatherpostkey(self): """POST way login need post-key :return: post way request data """ # call gather login data function self.login_bias = self._login_preload(dataload.LOGIN_AES_INI_PATH) response = self.url_request_handler( target_url=dataload.LOGIN_POSTKEY_URL, post_data=None, # cannot set data when get post key timeout=30, target_page_word='POST-key', need_log=False, log_path='') # cookie check for item in self.cookie: log_context = 'Cookie: [name:' + item.name + '-value:' + item.value + ']' dataload.logtime_print(log_context) # mate post key web_src = response.read().decode("UTF-8", "ignore") post_pattern = re.compile(dataload.POSTKEY_REGEX, re.S) postkey = re.findall(post_pattern, web_src)[0] log_context = 'Get post-key: ' + postkey dataload.logtime_print(log_context) # build post-way data with order dictory structure post_orderdict = OrderedDict() post_orderdict['pixiv_id'] = self.login_bias[0] post_orderdict['password'] = self.login_bias[1] post_orderdict['captcha'] = "" post_orderdict['g_recaptcha_response'] = "" post_orderdict['post_key'] = postkey post_orderdict['source'] = "pc" post_orderdict['ref'] = dataload.LOGIN_POSTDATA_REF post_orderdict['return_to'] = dataload.HTTPS_HOST_URL # transfer to json data format, the same way as GET way data postway_data = urllib.parse.urlencode(post_orderdict).encode("UTF-8") return postway_data
def mkworkdir(self, log_path, folder): """Create a crawler work directory @@API that allows external calls :param log_path: log save path :param folder: folder create path :return: folder create path """ # create a folder to save picture dataload.logtime_print('Crawler work directory setting: ' + folder) is_folder_existed = os.path.exists(folder) if not is_folder_existed: os.makedirs(folder) log_context = 'Create a new work folder' else: log_context = 'Target folder has already existed' # remove old log file if os.path.exists(log_path): os.remove(log_path) # this step will create a new log file and write the first line self.logprowork(log_path, log_context)
def target_confirm(self): """Input option and confirm target :return: request mainpage url, mode """ rank_word, req_url = None, None if self.ir_mode == 1: log_context = 'Gather ranking list======>' self.pvmx.logprowork(self.logpath, log_context) ormode = dataload.logtime_input( 'Select ranking type, ordinary(o|1) or r18(r|2): ') elif self.ir_mode == 2: ormode = self.rtn_r18_arg if ormode == 'o' or ormode == '1': if self.ir_mode == 1: dwm = dataload.logtime_input( 'Select daily(1) | weekly(2) | monthly(3) ordinary ranking type: ') elif self.ir_mode == 2: dwm = self.rtn_rank_type if dwm == '1': req_url = dataload.DAILY_RANKING_URL rank_word = dataload.DAILY_WORD elif dwm == '2': req_url = dataload.WEEKLY_RANKING_URL rank_word = dataload.WEEKLY_WORD elif dwm == '3': req_url = dataload.MONTHLY_RANKING_URL rank_word = dataload.MONTHLY_WORD else: dataload.logtime_print("Argument(s) error\n") log_context = 'Crawler set target to %s rank top' % rank_word elif ormode == 'r' or ormode == '2': if self.ir_mode == 1: dwm = dataload.logtime_input( 'Select daily(1)/weekly(2) R18 ranking type: ') elif self.ir_mode == 2: dwm = self.rtn_rank_type if dwm == '1': req_url = dataload.DAILY_RANKING_R18_URL rank_word = dataload.DAILY_WORD elif dwm == '2': req_url = dataload.WEEKLY_RANKING_R18_URL rank_word = dataload.WEEKLY_WORD else: dataload.logtime_print( "Argument(s) error\n") log_context = 'Crawler set target to %s r18 rank top' % rank_word else: dataload.logtime_print("Argument(s) error\n") log_context = None self.pvmx.logprowork(self.logpath, log_context) return req_url, ormode
def url_request_handler(self, target_url, post_data, timeout, target_page_word, need_log, log_path): """Universal URL request format handler @@API that allows external calls :param target_url: target request url :param post_data: post way data :param timeout: request timeout, suggest 30s :param target_page_word: target page symbol word :param need_log: need log? True is need, then log_path is must :param log_path: log save path :return: request result response(raw) """ response = None try: response = self.opener.open(fullurl=target_url, data=post_data, timeout=timeout) except Exception as e: log_context = "Error Type: " + str(e) if need_log == True: self.logprowork(log_path, log_context) else: dataload.logtime_print(log_context) except KeyboardInterrupt: log_context = 'User interrupt request, exit program' if need_log == True: self.logprowork(log_path, log_context) exit() # if response failed, crawler will exit with error code -1 if response is not None: if response.getcode() == dataload.HTTP_OK_CODE_200: log_context = target_page_word + ' response successed' else: log_context = ( target_page_word + ' response not ok, return code %d' % response.getcode()) if need_log == True: self.logprowork(log_path, log_context) else: dataload.logtime_print(log_context) else: log_context = target_page_word + ' response failed' if need_log == True: self.logprowork(log_path, log_context) else: dataload.logtime_print(log_context) exit(-1) return response
def gather_essential_info(ormode, whole_nbr): """Get input image count If user input number more than whole number, set target count is whole number Only intercative mode call this function :param ormode: select ranktop ordinary or r18 mode :param whole_nbr: whole ranking crawl count :return: crawl images count """ # transfer ascii string to number img_cnt = 0 # choose ordinary artwork images if ormode == 'o' or ormode == '1': # input a string for request image number img_str = dataload.logtime_input( 'Gather whole ordinary valid target %d, enter you want: ' % whole_nbr) # choose R18 artwork images elif ormode == 'r' or ormode == '2': # input a string for request image number img_str = dataload.logtime_input( 'Gather whole R18 vaild target %d, enter you want: ' % whole_nbr) # error input else: dataload.logtime_print("Argument(s) error\n") exit(-1) # if user input isn't number while not img_str.isdigit(): dataload.logtime_print( 'Input error, your input content was not a decimal number') img_str = dataload.logtime_input( 'Enter again(max is %d): ' % whole_nbr) # check input content is a number # if user input number more than limit max, set it to max img_cnt = int(img_str) if img_cnt > whole_nbr: img_cnt = whole_nbr elif img_cnt <= 0: dataload.logtime_print('What the f**k is wrong with you?') exit(-1) return img_cnt
def crawl_allpage_target(self): """Package all gather urls :return: none """ # calcus nbr need request count # each page at most ONE_AUTHOR_MAINPAGE_IMGCOUNT(20181003:48) images require_page_cnt = 0 if self.max_cnt <= dataload.ONE_PAGE_COMMIT: require_page_cnt = 1 else: require_page_cnt = int(self.max_cnt / dataload.ONE_PAGE_COMMIT) # remainder decision if self.max_cnt % dataload.ONE_PAGE_COMMIT != 0: require_page_cnt += 1 # build request url of one page iid_string_tail = '' page_url_array = [] for ix in range(require_page_cnt): # tail number limit tmp_tail_nbr = dataload.ONE_PAGE_COMMIT * (ix + 1) if tmp_tail_nbr > self.max_cnt: tmp_tail_nbr = self.max_cnt for index in self.pure_idlist[(dataload.ONE_PAGE_COMMIT * ix):tmp_tail_nbr]: iid_string_tail += dataload.IDS_UNIT(index) one_page_request_url = dataload.ALLREPOINFO_URL(self.user_input_id, iid_string_tail) iid_string_tail = '' # clear last cache page_url_array.append(one_page_request_url) # gather all data from response xhr page into a temp list tmp_receive_list = [] for i in range(require_page_cnt): tmp_receive_list += self.crawl_onepage_data(i + 1, page_url_array[i]) # handle url string repo_target_all_list = [] for i in range(len(tmp_receive_list)): # tasnform title '\\uxxx' to unicode tmp_receive_list[i][1] = self.pvmx.unicode_escape(tmp_receive_list[i][1]) # replace emoji string tmp_receive_list[i][1] = self.pvmx.replace_emoji(tmp_receive_list[i][1]) # build original url without image format tmp = tmp_receive_list[i][2] tmp = tmp.replace('\\', '') # delete character '\' tmp_receive_list[i][2] = dataload.ORIGINAL_IMAGE_HEAD + tmp[50:] + '.png' repo_target_all_list.append(tmp_receive_list[i]) # move original item to target list # use page count number build total url tmp_page_count_str = tmp_receive_list[i][3] if tmp_page_count_str.isdigit(): index_page_count = int(tmp_page_count_str) if index_page_count != 1: # add others items into list for px in range(index_page_count - 1): insert_item = [tmp_receive_list[i][0], tmp_receive_list[i][1], tmp_receive_list[i][2][:-5] + str(px + 1) + '.png', tmp_receive_list[i][3]] repo_target_all_list.append(insert_item) else: log_context = 'Page count process error!' self.pvmx.logprowork(self.logpath, log_context) exit(-1) del tmp_receive_list # clear cache # collection target count alive_targetcnt = len(repo_target_all_list) require_img_nbr = 0 if self.ir_mode == 1: require_img_str = dataload.logtime_input( 'Gather all repo %d, whole target(s): %d, enter you want count: ' % (self.max_cnt, alive_targetcnt)) # if user input isn't number while not require_img_str.isdigit(): dataload.logtime_print( 'Input error, your input content was not a decimal number') require_img_str = dataload.logtime_input( 'Enter again(max is %d): ' % alive_targetcnt) require_img_nbr = int(require_img_str) # if user input number more than limit max, set it to max if require_img_nbr > alive_targetcnt: require_img_nbr = alive_targetcnt elif require_img_nbr <= 0: dataload.logtime_print('What the f**k is wrong with you?') exit(-1) # server mode directly catch all of alive targets elif self.ir_mode == 2: require_img_nbr = alive_targetcnt dataload.logtime_print('Server mode auto crawl all of alive targets') # download image number limit for k, i in enumerate(repo_target_all_list[:require_img_nbr]): self.target_capture.append(i[2]) # put url into target capture list self.basepages.append(dataload.BASEPAGE_URL + i[0]) # build basepage url # display author info log_context = ('Illustrator: ' + self.author_name + ' id: ' + self.user_input_id + ' require image(s): ' + str(require_img_nbr) + ', target table:') self.pvmx.logprowork(self.logpath, log_context) # use prettytable build a table save and print info list image_info_table = PrettyTable( ["ImageNumber", "ImageID", "ImageTitle", "ImagePageName"]) for k, i in enumerate(repo_target_all_list[:require_img_nbr]): image_info_table.add_row([(k + 1), i[0], i[1], i[2][57:-4]]) # save with str format and no time word self.pvmx.logprowork(self.logpath, str(image_info_table), 'N') del repo_target_all_list # clear cache
def gather_preloadinfo(self): """Crawler need to know how many images do you want This function will get author name base on author id :return: none """ # request all of one illustrator's artworks response = self.pvmx.url_request_handler( target_url=dataload.AJAX_ALL_URL(self.user_input_id), post_data=self.pvmx.login_bias[2], timeout=30, target_page_word='Ajaxpage', need_log=True, log_path=self.logpath) # mate illustrator name web_src = response.read().decode("UTF-8", "ignore") ajax_idlist_pattern = re.compile(dataload.AJAX_ALL_IDLIST_REGEX, re.S) ajax_idlist = re.findall(ajax_idlist_pattern, web_src) # id list result may include some garbages, use number regex get pure result number_pattern = re.compile(dataload.NUMBER_REGEX, re.S) for index in ajax_idlist: one_pure_id = re.findall(number_pattern, index) if one_pure_id: self.pure_idlist.append(one_pure_id[0]) else: # very rare error, only happening in this address: # https://www.pixiv.net/member_illust.php?id=15115322 log_context = 'Get ajax page valid info failed, exit' self.pvmx.logprowork(self.logpath, log_context) exit(-1) # use quick-sort algorithm to handle id number # descending order sort pure_idlist_nbr = [] for index in self.pure_idlist: pure_idlist_nbr.append(int(index)) # string to integer number self.pvmx.quick_sort(pure_idlist_nbr, 0, len(pure_idlist_nbr) - 1) pure_idlist_nbr.reverse() # reverse order self.pure_idlist.clear() # clear origin list for index in pure_idlist_nbr: self.pure_idlist.append(str(index)) del pure_idlist_nbr # clear number cache self.max_cnt = len(self.pure_idlist) # get author name from member-main-page response = self.pvmx.url_request_handler( target_url=dataload.MEMBER_ILLUST_URL + self.user_input_id, post_data=self.pvmx.login_bias[2], timeout=30, target_page_word='Mainpage', need_log=True, log_path=self.logpath) # mate illustrator name web_src = response.read().decode("UTF-8", "ignore") illust_name_pattern = re.compile(dataload.ILLUST_NAME_REGEX, re.S) author_info = re.findall(illust_name_pattern, web_src) # if login failed, regex parsing result will be a empty list if len(author_info) == 0: dataload.logtime_print("Regex parsing result error, no author info, exit") exit() else: self.author_name = author_info[0]
def _login_preload(aes_file_path): """Get user input login info and storage into aes file If project directory has no file, you need hand-input login info, then program will create new file to storage AES encrypt info to it This method use pycrypto, need import external call :param aes_file_path: .aes_crypto_login.ini file path :return: username, password, get data """ is_aes_file_existed = os.path.exists(aes_file_path) if is_aes_file_existed: # stable read rows get username and password # read bin file content to a list read_aes_file = open(aes_file_path, 'rb+') readline_cache = read_aes_file.readlines() # all line list read_aes_file.close() read_aes_iv_param_raw = readline_cache[0] # row 1 is AES IV PARAM read_user_mailbox_raw = readline_cache[1] # row 2 is username read_user_passwd_raw = readline_cache[2] # row 3 is password # cut last char (b'\n') read_aes_iv_param_raw = read_aes_iv_param_raw[:-1] read_user_mailbox_raw = read_user_mailbox_raw[:-1] read_user_passwd_raw = read_user_passwd_raw[:-1] # analysis hash value to string username_aes_decrypt_cipher = AES.new(dataload.AES_SECRET_KEY, AES.MODE_CFB, read_aes_iv_param_raw) username = str( username_aes_decrypt_cipher.decrypt( read_user_mailbox_raw[AES.block_size:]), 'UTF-8') password_aes_decrypt_cipher = AES.new(dataload.AES_SECRET_KEY, AES.MODE_CFB, read_aes_iv_param_raw) passwd = str( password_aes_decrypt_cipher.decrypt( read_user_passwd_raw[AES.block_size:]), 'UTF-8') # check username and password check = dataload.logtime_input( "Read user login information configuration ok, check this: \n" "[-> Username] %s\n[-> Password] %s\n" "Is that correct? (Y/N): " % (username, passwd)) # if user judge info are error, delete old AES file and record new info if check == 'N' or check == 'n': os.remove(aes_file_path) # delete old AES file # temporarily enter login information dataload.logtime_print( "Well, you need hand-input your login data: ") username = dataload.logtime_input( 'Enter your pixiv id(mailbox), must be a R18: ').encode( 'utf-8') passwd = getpass.getpass( dataload.realtime_logword(dataload.base_time) + 'Enter your account password: '******'utf-8') generate_aes_iv_param = Random.new().read( AES.block_size) # generate random aes iv param username_cipher = AES.new(dataload.AES_SECRET_KEY, AES.MODE_CFB, generate_aes_iv_param) username_encrypto = generate_aes_iv_param + username_cipher.encrypt( username) passwd_cipher = AES.new(dataload.AES_SECRET_KEY, AES.MODE_CFB, generate_aes_iv_param) passwd_encrypto = generate_aes_iv_param + passwd_cipher.encrypt( passwd) # create new aes file rewrite it write_aes_file = open(aes_file_path, 'wb') # write bin value to file with b'\n' to wrap write_aes_file.write(generate_aes_iv_param + b'\n') # row 1 is iv param write_aes_file.write(username_encrypto + b'\n') # row 2 is username write_aes_file.write(passwd_encrypto + b'\n') # row 3 is password write_aes_file.close() # read info correct, jump out here else: pass # if no AES file, then create new and write md5 value into it else: dataload.logtime_print( "Create new AES encrypt file to storage your username and password: " ) username = dataload.logtime_input( 'Enter your pixiv id(mailbox), must be a R18: ').encode( 'utf-8') passwd = getpass.getpass( dataload.realtime_logword(dataload.base_time) + 'Enter your account password: '******'utf-8') generate_aes_iv_param = Random.new().read( AES.block_size) # generate random aes iv param username_cipher = AES.new(dataload.AES_SECRET_KEY, AES.MODE_CFB, generate_aes_iv_param) username_encrypto = generate_aes_iv_param + username_cipher.encrypt( username) passwd_cipher = AES.new(dataload.AES_SECRET_KEY, AES.MODE_CFB, generate_aes_iv_param) passwd_encrypto = generate_aes_iv_param + passwd_cipher.encrypt( passwd) # create new AES file, set write bin bytes mode write_aes_file = open(aes_file_path, 'wb') # write bin value to file with b'\n' to wrap write_aes_file.write(generate_aes_iv_param + b'\n') # row 1 is iv param write_aes_file.write(username_encrypto + b'\n') # row 2 is username write_aes_file.write(passwd_encrypto + b'\n') # row 3 is password write_aes_file.close() # build data string getway_register = [('user', username), ('pass', passwd)] getway_data = urllib.parse.urlencode(getway_register).encode( encoding='UTF8') return username, passwd, getway_data # return login use 3 elements
def main(): """main() function Get user input arguments and launch mode function :return: none """ print(PixivAPILib.__doc__) # program work continue ask ask_res = dataload.logtime_input('%s lanuch, continue? (Y/N): ' % dataload.PROJECT_NAME) if ask_res == 'N' or ask_res == 'No' or ask_res == 'n': dataload.logtime_print("User exit program\n") exit(0) # website id and password require ask_res = dataload.logtime_input( 'Crawler will use your Pixiv-ID and password to login to the website, agree? (Y/N): ' ) if ask_res == 'N' or ask_res == 'No' or ask_res == 'n': dataload.logtime_print("No ID and password crawler cannot work, exit") exit(0) api_instance = PixivAPILib() # instance class to a object api_instance.camouflage_login() # crawler simulated login # multiple task cycles while True: mode = dataload.logtime_input('Login finished, select mode: ') # ranking top N mode if mode == 'rtn' or mode == '1': dataload.logtime_print('Mode: [Ranking Top N]') rtn_instance = rtn(dataload.RANK_DIR, dataload.LOG_PATH, dataload.HTML_PATH, api_instance) rtn_instance.start() # illustrator repositories all mode elif mode == 'ira' or mode == '2': dataload.logtime_print('Mode: [Illustrator Repository All]') ira_instance = ira(dataload.REPO_DIR, dataload.LOG_NAME, dataload.HTML_NAME, api_instance) ira_instance.start() # help page elif mode == 'help' or mode == '3': print(PixivAPILib.__doc__) # user normal exit program elif mode == 'exit' or mode == '4': dataload.logtime_print("User exit program") dataload.crawler_logo() # exit print logo exit(0) # input parameter error, into next circle else: dataload.logtime_print("Argument(s) error")
def target_confirm(self): """Input option and confirm target :return: request mainpage url, mode """ rank_word, req_url = None, None if self.ir_mode == 1: log_context = 'Gather ranking list======>' self.pvmx.logprowork(self.logpath, log_context) # select rank R18 or not ormode = dataload.logtime_input( 'Select ranking type, ordinary(o|1) or r18(r|2): ') mf_word = dataload.logtime_input( 'Select sex favor, normal(n|0) or male(m|1) or female(f|2): ') elif self.ir_mode == 2: ormode = self.rtn_r18_arg mf_word = self.rtn_mf_word if ormode == 'o' or ormode == '1': if self.ir_mode == 1: dwm = dataload.logtime_input( 'Select daily(1) | weekly(2) | monthly(3) ordinary ranking type: ' ) elif self.ir_mode == 2: dwm = self.rtn_rank_type if dwm == '1': if mf_word == '0' or mf_word == 'n': req_url = dataload.DAILY_RANKING_URL rank_word = dataload.DAILY_WORD # choose the mail or female, rank type only can be set to daily elif mf_word == '1' or mf_word == 'm': req_url = dataload.DAILY_MALE_RANKING_URL rank_word = dataload.MALE_WORD elif mf_word == '2' or mf_word == 'f': req_url = dataload.DAILY_FEMALE_RANKING_URL rank_word = dataload.FEMALE_WORD else: dataload.logtime_print("Argument(s) error\n") elif dwm == '2': req_url = dataload.WEEKLY_RANKING_URL rank_word = dataload.WEEKLY_WORD elif dwm == '3': req_url = dataload.MONTHLY_RANKING_URL rank_word = dataload.MONTHLY_WORD else: dataload.logtime_print("Argument(s) error\n") log_context = 'Crawler set target to %s rank top' % rank_word elif ormode == 'r' or ormode == '2': if self.ir_mode == 1: dwm = dataload.logtime_input( 'Select daily(1)/weekly(2) R18 ranking type: ') elif self.ir_mode == 2: dwm = self.rtn_rank_type if dwm == '1': if mf_word == '0' or mf_word == 'n': req_url = dataload.DAILY_RANKING_R18_URL rank_word = dataload.DAILY_WORD # choose the mail or female, rank type only can be set to daily elif mf_word == '1' or mf_word == 'm': req_url = dataload.DAILY_MALE_RANKING_R18_URL rank_word = dataload.MALE_WORD elif mf_word == '2' or mf_word == 'f': req_url = dataload.DAILY_FEMALE_RANKING_R18_URL rank_word = dataload.FEMALE_WORD else: dataload.logtime_print("Argument(s) error\n") elif dwm == '2': req_url = dataload.WEEKLY_RANKING_R18_URL rank_word = dataload.WEEKLY_WORD else: dataload.logtime_print("Argument(s) error\n") log_context = 'Crawler set target to %s r18 rank top' % rank_word else: dataload.logtime_print("Argument(s) error\n") log_context = None self.pvmx.logprowork(self.logpath, log_context) return req_url, ormode
def main(): """main() function Get user input arguments and launch mode function :return: none """ print(PixivAPILib.__doc__) mode_interactive_server = 1 # intercative mode or server mode, default interavtive mode(1) # judge the count of command line argument # if no external arguments, into interactive mode if len(sys.argv) == 1: mode_interactive_server = 1 # program work continue ask ask_res = dataload.logtime_input('%s lanuch, continue? (Y/N): ' % dataload.PROJECT_NAME) if ask_res == 'N' or ask_res == 'No' or ask_res == 'n': dataload.logtime_print("User exit program\n") exit(0) # website id and password require ask_res = dataload.logtime_input( 'Crawler will use your Pixiv-ID and password to login to the website, agree? (Y/N): ') if ask_res == 'N' or ask_res == 'No' or ask_res == 'n': dataload.logtime_print("No ID and password crawler cannot work, exit") exit(0) api_instance = PixivAPILib(mode_interactive_server) # instance class to a object api_instance.camouflage_login() # crawler simulated login # multiple task cycles while True: mode = dataload.logtime_input('Login finished, select mode: ') # ranking top N mode if mode == 'rtn' or mode == '1': dataload.logtime_print('Mode: [Ranking Top N]') rtn_instance = rtn(dataload.RANK_DIR, dataload.LOG_PATH, dataload.HTML_PATH, api_instance, mode_interactive_server) rtn_instance.start() # illustrator repositories all mode elif mode == 'ira' or mode == '2': dataload.logtime_print('Mode: [Illustrator Repository All]') ira_instance = ira(dataload.REPO_DIR, dataload.LOG_NAME, dataload.HTML_NAME, api_instance, mode_interactive_server) ira_instance.start() # help page elif mode == 'help' or mode == '3': print(PixivAPILib.__doc__) # user normal exit program elif mode == 'exit' or mode == '4': dataload.logtime_print("User exit program") dataload.crawler_logo() # exit print logo exit(0) # input parameter error, into next circle else: dataload.logtime_print("Argument(s) error") else: mode_interactive_server = 2 # argument pass to variable opts, args = getopt.getopt(sys.argv[1:], "hm:r:l:s:i:", ["help", "mode", "R18", "list", "sex", "id"]) catch_mode = '1' rtn_r18_opt = '1' rtn_list_type = '1' rtn_mf_word = '' ira_illust_id = '' for opt, value in opts: if opt in ("-m", "--mode"): catch_mode = value elif opt in ("-r", "--R18"): rtn_r18_opt = value elif opt in ("-l", "--list"): rtn_list_type = value elif opt in ("-s", "--sex"): rtn_mf_word = value elif opt in ("-i", "--id"): ira_illust_id = value elif opt in ("-h", "--help"): print(PixivAPILib.__doc__) exit(0) api_instance = PixivAPILib(mode_interactive_server) # instance class to a object api_instance.camouflage_login() # crawler simulated login if catch_mode == '1': dataload.logtime_print('Mode: [Ranking Top N]') rtn_instance = rtn(dataload.RANK_DIR, dataload.LOG_PATH, dataload.HTML_PATH, api_instance, mode_interactive_server, rtn_r18_opt, rtn_list_type, rtn_mf_word) rtn_instance.start() # illustrator repositories all mode elif catch_mode == '2': dataload.logtime_print('Mode: [Illustrator Repository All]') ira_instance = ira(dataload.REPO_DIR, dataload.LOG_NAME, dataload.HTML_NAME, api_instance, mode_interactive_server, ira_illust_id) ira_instance.start() # help page elif catch_mode == 'help' or catch_mode == '3': print(PixivAPILib.__doc__)