def rtn_gather_essential_info(page_opt, whole_nbr): """Get input image count If user input number more than whole number, set target count is whole number Only intercative mode call this function :param page_opt: select ranktop ordinary or r18 mode :param whole_nbr: whole ranking crawl count :return: crawl images count """ img_cnt = 0 if page_opt == dl.PAGE_ORDINARY: label = 'ordinary' elif page_opt == dl.PAGE_R18: label = 'r18' elif page_opt == dl.PAGE_R18G: label = 'r18g' else: dl.nolog_raise_arguerr() return dl.PUB_E_PARAM_FAIL img_str = dl.LT_INPUT(dl.HL_CY('crawl %s valid target %d, enter you want: ' % (label, whole_nbr))) while not img_str.isdigit(): img_str = dl.LT_INPUT(dl.HL_CY('input error, enter again(max is %d): ' % whole_nbr)) img_cnt = int(img_str) if img_cnt <= 0: dl.LT_PRINT(dl.BR_CB('what the f**k is wrong with you?')) return dl.PUB_E_PARAM_FAIL if img_cnt > whole_nbr: img_cnt = whole_nbr return img_cnt
def __init__(self, workdir, log_name, html_name, wkv_cw_api, ir_mode, ext_id=''): """ :param workdir: work directory :param log_name: log name :param html_name: html name :param wkv_cw_api: API library class instance :param ir_mode: interactive mode or server mode :param ext_id: external illustrator id """ if ir_mode == dl.MODE_INTERACTIVE: self.user_input_id = dl.LT_INPUT(dl.HL_CY('target crawl illustrator pixiv-id: ')) elif ir_mode == dl.MODE_SERVER: self.user_input_id = ext_id self.workdir = workdir + 'illustrepo_' + self.user_input_id self.logpath = self.workdir + log_name self.htmlpath = self.workdir + html_name self.wkv_cw_api = WkvCwApi(ir_mode) # declare object type self.wkv_cw_api = wkv_cw_api self.ir_mode = ir_mode # class inside call global variable self.ira_author_name = None self.ira_max_cnt = 0 self.ira_pure_idlist = [] self.ira_target_capture = [] self.ira_basepages = []
def _login_preload(self, aes_file_path): """Get user input login info and storage into aes file If project directory has no file, you need hand-input login info, then program will create new file to storage AES encrypt info to it This method use pycrypto, need import external call :param aes_file_path: .aes_crypto_login.ini file path :return: none """ if os.path.exists(aes_file_path): # stable read rows get username and password read_aes_file = open(aes_file_path, 'rb+') readline_cache = read_aes_file.readlines() # all line list read_aes_file.close() # get aes file storage info and split tail '\n' aes_info = { 'iv_param': readline_cache[0][:-1], 'user_mail': readline_cache[1][:-1], 'passwd': readline_cache[2][:-1] } # analysis hash value to string username_aes_decrypt_cipher = AES.new(dl.AES_SECRET_KEY, AES.MODE_CFB, aes_info['iv_param']) username = str( username_aes_decrypt_cipher.decrypt( aes_info['user_mail'][AES.block_size:]), 'UTF-8') password_aes_decrypt_cipher = AES.new(dl.AES_SECRET_KEY, AES.MODE_CFB, aes_info['iv_param']) passwd = str( password_aes_decrypt_cipher.decrypt( aes_info['passwd'][AES.block_size:]), 'UTF-8') if self.ir_mode == dl.MODE_INTERACTIVE: check = dl.LT_INPUT( dl.HL_CY("get user account info ok, check: \n" "[*username] %s\n[*password] %s\n" "Is that correct? (Y/N): " % (username, passwd))) # if user judge info are error, delete old AES file and record new info if check == 'N' or check == 'n': os.remove(aes_file_path) # delete old AES file # temporarily enter login information dl.LT_PRINT( dl.BY_CB( "Well, you need hand-input your login data: ")) username = dl.LT_INPUT( dl.HL_CY( 'enter your pixiv id(mailbox), must be a R18: ')) passwd = getpass.getpass( dl.realtime_logword(dl.base_time) + dl.HL_CY('enter your account password: '******'enter your pixiv id(mailbox), must be a R18: ')) passwd = getpass.getpass( dl.realtime_logword(dl.base_time) + dl.HL_CY('enter your account password: '******'user', username), ('pass', passwd)] getway_data = urllib.parse.urlencode(getway_register).encode( encoding='UTF8') self.username = username self.passwd = passwd self.getway_data = getway_data
def rtn_target_confirm(self): """Input option and confirm target :return: status code """ req_url = None # request target ranking url rank_word = None # ranking word dwm_opt = None # daily/weekly/monthly if self.ir_mode == dl.MODE_INTERACTIVE: page_opt = dl.LT_INPUT(dl.HL_CY('select ranking type, ordinary(1) | r18(2) | r18g(3): ')) sex_opt = dl.LT_INPUT(dl.HL_CY('select sex favor, normal(0) | male(1) | female(2): ')) elif self.ir_mode == dl.MODE_SERVER: page_opt = self.rtn_r18_arg sex_opt = self.rtn_sex_opt else: dl.nolog_raise_arguerr() return dl.PUB_E_PARAM_FAIL if page_opt == dl.PAGE_ORDINARY: if self.ir_mode == dl.MODE_INTERACTIVE: dwm_opt = dl.LT_INPUT(dl.HL_CY('select daily(1) | weekly(2) | monthly(3) ordinary ranking type: ')) elif self.ir_mode == dl.MODE_SERVER: dwm_opt = self.rtn_rank_type else: dl.nolog_raise_arguerr() return dl.PUB_E_PARAM_FAIL if dwm_opt == dl.RANK_DAILY: if sex_opt == dl.SEX_NORMAL: req_url = dl.RANK_DAILY_URL rank_word = dl.DAILY_WORD elif sex_opt == dl.SEX_MALE: req_url = dl.RANK_DAILY_MALE_URL rank_word = dl.MALE_WORD elif sex_opt == dl.SEX_FEMALE: req_url = dl.RANK_DAILY_FEMALE_URL rank_word = dl.FEMALE_WORD else: dl.nolog_raise_arguerr() return dl.PUB_E_PARAM_FAIL elif dwm_opt == dl.RANK_WEEKLY: req_url = dl.RANK_WEEKLY_URL rank_word = dl.WEEKLY_WORD elif dwm_opt == dl.RANK_MONTHLY: req_url = dl.RANK_MONTHLY_URL rank_word = dl.MONTHLY_WORD else: dl.nolog_raise_arguerr() return dl.PUB_E_PARAM_FAIL elif page_opt == dl.PAGE_R18: if self.ir_mode == dl.MODE_INTERACTIVE: dwm_opt = dl.LT_INPUT(dl.HL_CY('select daily(1)/weekly(2) R18 ranking type: ')) elif self.ir_mode == dl.MODE_SERVER: dwm_opt = self.rtn_rank_type else: dl.nolog_raise_arguerr() return dl.PUB_E_PARAM_FAIL if dwm_opt == dl.RANK_DAILY: if sex_opt == dl.SEX_NORMAL: req_url = dl.RANK_DAILY_R18_URL rank_word = dl.DAILY_WORD elif sex_opt == dl.SEX_MALE: req_url = dl.RANK_DAILY_MALE_R18_URL rank_word = dl.MALE_WORD elif sex_opt == dl.SEX_FEMALE: req_url = dl.RANK_DAILY_FEMALE_R18_URL rank_word = dl.FEMALE_WORD else: dl.nolog_raise_arguerr() return dl.PUB_E_PARAM_FAIL elif dwm_opt == dl.RANK_WEEKLY: req_url = dl.RANK_WEEKLY_R18_URL rank_word = dl.WEEKLY_WORD else: dl.nolog_raise_arguerr() return dl.PUB_E_PARAM_FAIL elif page_opt == dl.PAGE_R18G: req_url = dl.RANK_R18G_URL rank_word = dl.R18G_WORD dl.LT_PRINT(dl.BR_CB('warning: you choose the r18g rank, hope you know what it means')) else: dl.nolog_raise_arguerr() return dl.PUB_E_PARAM_FAIL log_content = dl.BY_CB('base select option, set rank target url: [%s]' % req_url) self.wkv_cw_api.wca_logprowork(self.logpath, log_content) self.rtn_req_url = req_url self.page_opt = page_opt return dl.PUB_E_OK
def ira_crawl_allpage_target(self): """Package all gather urls :return: status code """ require_page_cnt = 0 if self.ira_max_cnt <= dl.ONE_PAGE_COMMIT: require_page_cnt = 1 else: require_page_cnt = int(self.ira_max_cnt / dl.ONE_PAGE_COMMIT) # remainder decision if self.ira_max_cnt % dl.ONE_PAGE_COMMIT != 0: require_page_cnt += 1 # build the json data url iid_string_tail = '' subpage_url_list = [] for ix in range(require_page_cnt): # one subpage only include 6*8 valid image, others are invalid tmp_tail_nbr = dl.ONE_PAGE_COMMIT * (ix + 1) tmp_tail_nbr = self.ira_max_cnt if tmp_tail_nbr > self.ira_max_cnt else tmp_tail_nbr for index in self.ira_pure_idlist[(dl.ONE_PAGE_COMMIT * ix):tmp_tail_nbr]: iid_string_tail += dl.IDS_UNIT(index) subpage_url_list.append(dl.ALLREPOINFO_URL(self.user_input_id, iid_string_tail, 1 if ix == 0 else 0)) iid_string_tail = '' # clear last cache # get all data from response xhr page into a temp list tmp_receive_list = [] tmp_ret = [] for i in range(require_page_cnt): tmp_ret = self.ira_crawl_subpage_data(i + 1, subpage_url_list[i]) if not isinstance(tmp_ret, list): return dl.PUB_E_FAIL tmp_receive_list += tmp_ret repo_target_all_list = [] for i in range(len(tmp_receive_list)): tmp_receive_list[i][1] = dl.UNICODE_ESCAPE(tmp_receive_list[i][1]) tmp_receive_list[i][1] = dl.EMOJI_REPLACE(tmp_receive_list[i][1]) # build original url without image format tmp = tmp_receive_list[i][2] tmp = tmp.replace('\\', '') tmp_receive_list[i][2] = dl.ORIGINAL_IMAGE_HEAD + tmp[-39:-7] + '.png' # first original url repo_target_all_list.append(tmp_receive_list[i]) # add other original image url by pageCount tmp_page_count_str = tmp_receive_list[i][3] if tmp_page_count_str.isdigit(): index_page_count = int(tmp_page_count_str) if index_page_count != 1: for px in range(index_page_count): insert_item = [tmp_receive_list[i][0], tmp_receive_list[i][1], tmp_receive_list[i][2][:-5] + str(px) + '.png', tmp_receive_list[i][3]] repo_target_all_list.append(insert_item) else: log_content = dl.BR_CB('page count process error') self.wkv_cw_api.wca_logprowork(self.logpath, log_content) return dl.PUB_E_FAIL del tmp_receive_list alive_target_cnt = len(repo_target_all_list) require_img_nbr = 0 if self.ir_mode == dl.MODE_INTERACTIVE: require_img_str = dl.LT_INPUT(dl.HL_CY('crawl all repo %d, whole target(s): %d, enter you want count: ' % (self.ira_max_cnt, alive_target_cnt))) # if user input isn't number while not require_img_str.isdigit(): dl.LT_PRINT(dl.BR_CB('input error, your input content was not a decimal number')) require_img_str = dl.LT_INPUT(dl.HL_CY('enter again(max is %d): ' % alive_target_cnt)) require_img_nbr = int(require_img_str) if require_img_nbr <= 0: dl.LT_PRINT(dl.BR_CB('what the f**k is wrong with you?')) return dl.PUB_E_PARAM_FAIL require_img_nbr = alive_target_cnt if require_img_nbr > alive_target_cnt else require_img_nbr elif self.ir_mode == dl.MODE_SERVER: require_img_nbr = alive_target_cnt dl.LT_PRINT(dl.BY_CB('server mode auto crawl all of alive targets')) else: pass for k, i in enumerate(repo_target_all_list[:require_img_nbr]): self.ira_target_capture.append(i[2]) self.ira_basepages.append(dl.BASEPAGE_URL(i[0])) log_content = 'illustrator [%s] id [%s], require image(s): %d, target table:' \ % (self.ira_author_name, self.user_input_id, require_img_nbr) self.wkv_cw_api.wca_logprowork(self.logpath, log_content) image_info_table = PrettyTable(["ImageNumber", "ImageID", "ImageTitle", "ImagePageName"]) for k, i in enumerate(repo_target_all_list[:require_img_nbr]): image_info_table.add_row([(k + 1), i[0], i[1], dl.FROM_URL_GET_IMG_NAME(i[2])]) # damn emoji, maybe dump failed try: self.wkv_cw_api.wca_logprowork(self.logpath, str(image_info_table), False) except Exception as e: dl.LT_PRINT(dl.BR_CB('error: %s, dump prettytable interrupt' % str(e))) del repo_target_all_list return dl.PUB_E_OK
def main(): """main logic Get user input arguments and launch mode function :return: none """ select_option = dl.SELECT_RTN rtn_page_opt = dl.PAGE_ORDINARY rtn_rank_opt = dl.RANK_DAILY rtn_sex_opt = dl.SEX_NORMAL ira_illust_id_list = [] print(dl.HL_CR(WkvCwApi.__doc__)) mode_interactive_server = dl.MODE_INTERACTIVE if len( sys.argv) == 1 else dl.MODE_SERVER api_instance = WkvCwApi(mode_interactive_server) api_instance.wca_camouflage_login() while True: if mode_interactive_server == dl.MODE_INTERACTIVE: select_option = dl.LT_INPUT( dl.HL_CY('login completed, select mode: ')) else: opts, args = getopt.getopt( sys.argv[1:], "hm:r:l:s:i:", ["help", "mode", "R18", "list", "sex", "id"]) for opt, value in opts: if opt in ("-m", "--mode"): select_option = value elif opt in ("-r", "--R18"): rtn_page_opt = value elif opt in ("-l", "--list"): rtn_rank_opt = value elif opt in ("-s", "--sex"): rtn_sex_opt = value elif opt in ("-i", "--id"): ira_illust_id_list = value.split( ',' ) # server mode support multi-input id and split with ',' elif opt in ("-h", "--help"): print(dl.HL_CR(WkvCwApi.__doc__)) exit(dl.PUB_E_OK) if select_option == dl.SELECT_RTN: dl.LT_PRINT(dl.BY_CB('mode: [Ranking Top N]')) rtn_instance = rtn(dl.RANK_DIR, dl.LOG_PATH, dl.HTML_PATH, api_instance, mode_interactive_server, rtn_page_opt, rtn_rank_opt, rtn_sex_opt) rtn_instance.start() elif select_option == dl.SELECT_IRA: dl.LT_PRINT(dl.BY_CB('mode: [Illustrator Repository All]')) if mode_interactive_server == dl.MODE_SERVER: for ira_illust_id in ira_illust_id_list: ira_instance = ira(dl.g_dl_work_dir, dl.LOG_NAME, dl.HTML_NAME, api_instance, mode_interactive_server, ira_illust_id) ira_instance.start() else: ira_instance = ira(dl.g_dl_work_dir, dl.LOG_NAME, dl.HTML_NAME, api_instance, mode_interactive_server, '') ira_instance.start() elif select_option == dl.SELECT_HELP: print(dl.HL_CR(WkvCwApi.__doc__)) elif select_option == dl.SELECT_EXIT: dl.LT_PRINT(dl.BY_CB("user exit program")) dl.crawler_logo() # exit print logo exit(dl.PUB_E_OK) else: dl.nolog_raise_arguerr() if mode_interactive_server == dl.MODE_SERVER: exit(dl.PUB_E_OK)