Exemplo n.º 1
0
    def rtn_gather_essential_info(page_opt, whole_nbr):
        """Get input image count

        If user input number more than whole number, set target count is whole number
        Only intercative mode call this function
        :param page_opt:      select ranktop ordinary or r18 mode
        :param whole_nbr:   whole ranking crawl count
        :return:            crawl images count
        """
        img_cnt = 0

        if page_opt == dl.PAGE_ORDINARY:
            label = 'ordinary'
        elif page_opt == dl.PAGE_R18:
            label = 'r18'
        elif page_opt == dl.PAGE_R18G:
            label = 'r18g'
        else:
            dl.nolog_raise_arguerr()
            return dl.PUB_E_PARAM_FAIL

        img_str = dl.LT_INPUT(dl.HL_CY('crawl %s valid target %d, enter you want: ' % (label, whole_nbr)))

        while not img_str.isdigit():
            img_str = dl.LT_INPUT(dl.HL_CY('input error, enter again(max is %d): ' % whole_nbr))
        img_cnt = int(img_str)
        if img_cnt <= 0:
            dl.LT_PRINT(dl.BR_CB('what the f**k is wrong with you?'))
            return dl.PUB_E_PARAM_FAIL

        if img_cnt > whole_nbr:
            img_cnt = whole_nbr

        return img_cnt
Exemplo n.º 2
0
    def __init__(self, workdir, log_name, html_name, wkv_cw_api, ir_mode, ext_id=''):
        """
        :param workdir:     work directory
        :param log_name:    log name
        :param html_name:   html name
        :param wkv_cw_api:  API library class instance
        :param ir_mode:     interactive mode or server mode
        :param ext_id:      external illustrator id
        """
        if ir_mode == dl.MODE_INTERACTIVE:
            self.user_input_id  = dl.LT_INPUT(dl.HL_CY('target crawl illustrator pixiv-id: '))
        elif ir_mode == dl.MODE_SERVER:
            self.user_input_id  = ext_id

        self.workdir            = workdir + 'illustrepo_' + self.user_input_id
        self.logpath            = self.workdir + log_name
        self.htmlpath           = self.workdir + html_name
        self.wkv_cw_api         = WkvCwApi(ir_mode) # declare object type
        self.wkv_cw_api         = wkv_cw_api
        self.ir_mode            = ir_mode
        # class inside call global variable
        self.ira_author_name    = None
        self.ira_max_cnt        = 0
        self.ira_pure_idlist    = []
        self.ira_target_capture = []
        self.ira_basepages      = []
Exemplo n.º 3
0
    def _login_preload(self, aes_file_path):
        """Get user input login info and storage into aes file

        If project directory has no file, you need hand-input login info,
        then program will create new file to storage AES encrypt info to it
        This method use pycrypto, need import external call
        :param aes_file_path:       .aes_crypto_login.ini file path
        :return:                    none
        """
        if os.path.exists(aes_file_path):
            # stable read rows get username and password
            read_aes_file = open(aes_file_path, 'rb+')
            readline_cache = read_aes_file.readlines()  # all line list
            read_aes_file.close()

            # get aes file storage info and split tail '\n'
            aes_info = {
                'iv_param': readline_cache[0][:-1],
                'user_mail': readline_cache[1][:-1],
                'passwd': readline_cache[2][:-1]
            }

            # analysis hash value to string
            username_aes_decrypt_cipher = AES.new(dl.AES_SECRET_KEY,
                                                  AES.MODE_CFB,
                                                  aes_info['iv_param'])
            username = str(
                username_aes_decrypt_cipher.decrypt(
                    aes_info['user_mail'][AES.block_size:]), 'UTF-8')
            password_aes_decrypt_cipher = AES.new(dl.AES_SECRET_KEY,
                                                  AES.MODE_CFB,
                                                  aes_info['iv_param'])
            passwd = str(
                password_aes_decrypt_cipher.decrypt(
                    aes_info['passwd'][AES.block_size:]), 'UTF-8')

            if self.ir_mode == dl.MODE_INTERACTIVE:
                check = dl.LT_INPUT(
                    dl.HL_CY("get user account info ok, check: \n"
                             "[*username] %s\n[*password] %s\n"
                             "Is that correct? (Y/N): " % (username, passwd)))

                # if user judge info are error, delete old AES file and record new info
                if check == 'N' or check == 'n':
                    os.remove(aes_file_path)  # delete old AES file
                    # temporarily enter login information
                    dl.LT_PRINT(
                        dl.BY_CB(
                            "Well, you need hand-input your login data: "))
                    username = dl.LT_INPUT(
                        dl.HL_CY(
                            'enter your pixiv id(mailbox), must be a R18: '))
                    passwd = getpass.getpass(
                        dl.realtime_logword(dl.base_time) +
                        dl.HL_CY('enter your account password: '******'enter your pixiv id(mailbox), must be a R18: '))
            passwd = getpass.getpass(
                dl.realtime_logword(dl.base_time) +
                dl.HL_CY('enter your account password: '******'user', username), ('pass', passwd)]
        getway_data = urllib.parse.urlencode(getway_register).encode(
            encoding='UTF8')

        self.username = username
        self.passwd = passwd
        self.getway_data = getway_data
Exemplo n.º 4
0
    def rtn_target_confirm(self):
        """Input option and confirm target

        :return:        status code
        """
        req_url     = None      # request target ranking url
        rank_word   = None      # ranking word
        dwm_opt     = None      # daily/weekly/monthly

        if self.ir_mode == dl.MODE_INTERACTIVE:
            page_opt    = dl.LT_INPUT(dl.HL_CY('select ranking type, ordinary(1) | r18(2) | r18g(3): '))
            sex_opt     = dl.LT_INPUT(dl.HL_CY('select sex favor, normal(0) | male(1) | female(2): '))
        elif self.ir_mode == dl.MODE_SERVER:
            page_opt    = self.rtn_r18_arg
            sex_opt     = self.rtn_sex_opt
        else:
            dl.nolog_raise_arguerr()
            return dl.PUB_E_PARAM_FAIL

        if page_opt == dl.PAGE_ORDINARY:
            if self.ir_mode == dl.MODE_INTERACTIVE:
                dwm_opt = dl.LT_INPUT(dl.HL_CY('select daily(1) | weekly(2) | monthly(3) ordinary ranking type: '))
            elif self.ir_mode == dl.MODE_SERVER:
                dwm_opt = self.rtn_rank_type
            else:
                dl.nolog_raise_arguerr()
                return dl.PUB_E_PARAM_FAIL

            if dwm_opt == dl.RANK_DAILY:
                if sex_opt == dl.SEX_NORMAL:
                    req_url     = dl.RANK_DAILY_URL
                    rank_word   = dl.DAILY_WORD
                elif sex_opt == dl.SEX_MALE:
                    req_url     = dl.RANK_DAILY_MALE_URL
                    rank_word   = dl.MALE_WORD
                elif sex_opt == dl.SEX_FEMALE:
                    req_url     = dl.RANK_DAILY_FEMALE_URL
                    rank_word   = dl.FEMALE_WORD
                else:
                    dl.nolog_raise_arguerr()
                    return dl.PUB_E_PARAM_FAIL
            elif dwm_opt == dl.RANK_WEEKLY:
                req_url     = dl.RANK_WEEKLY_URL
                rank_word   = dl.WEEKLY_WORD
            elif dwm_opt == dl.RANK_MONTHLY:
                req_url     = dl.RANK_MONTHLY_URL
                rank_word   = dl.MONTHLY_WORD
            else:
                dl.nolog_raise_arguerr()
                return dl.PUB_E_PARAM_FAIL

        elif page_opt == dl.PAGE_R18:
            if self.ir_mode == dl.MODE_INTERACTIVE:
                dwm_opt = dl.LT_INPUT(dl.HL_CY('select daily(1)/weekly(2) R18 ranking type: '))
            elif self.ir_mode == dl.MODE_SERVER:
                dwm_opt = self.rtn_rank_type
            else:
                dl.nolog_raise_arguerr()
                return dl.PUB_E_PARAM_FAIL
            if dwm_opt == dl.RANK_DAILY:
                if sex_opt == dl.SEX_NORMAL:
                    req_url     = dl.RANK_DAILY_R18_URL
                    rank_word   = dl.DAILY_WORD
                elif sex_opt == dl.SEX_MALE:
                    req_url     = dl.RANK_DAILY_MALE_R18_URL
                    rank_word   = dl.MALE_WORD
                elif sex_opt == dl.SEX_FEMALE:
                    req_url     = dl.RANK_DAILY_FEMALE_R18_URL
                    rank_word   = dl.FEMALE_WORD
                else:
                    dl.nolog_raise_arguerr()
                    return dl.PUB_E_PARAM_FAIL
            elif dwm_opt == dl.RANK_WEEKLY:
                req_url     = dl.RANK_WEEKLY_R18_URL
                rank_word   = dl.WEEKLY_WORD
            else:
                dl.nolog_raise_arguerr()
                return dl.PUB_E_PARAM_FAIL

        elif page_opt == dl.PAGE_R18G:
            req_url     = dl.RANK_R18G_URL
            rank_word   = dl.R18G_WORD
            dl.LT_PRINT(dl.BR_CB('warning: you choose the r18g rank, hope you know what it means'))

        else:
            dl.nolog_raise_arguerr()
            return dl.PUB_E_PARAM_FAIL

        log_content = dl.BY_CB('base select option, set rank target url: [%s]' % req_url)
        self.wkv_cw_api.wca_logprowork(self.logpath, log_content)
        self.rtn_req_url    = req_url
        self.page_opt       = page_opt

        return dl.PUB_E_OK
Exemplo n.º 5
0
    def ira_crawl_allpage_target(self):
        """Package all gather urls

        :return:            status code
        """
        require_page_cnt = 0

        if self.ira_max_cnt <= dl.ONE_PAGE_COMMIT:
            require_page_cnt = 1
        else:
            require_page_cnt = int(self.ira_max_cnt / dl.ONE_PAGE_COMMIT)
            # remainder decision
            if self.ira_max_cnt % dl.ONE_PAGE_COMMIT != 0:
                require_page_cnt += 1

        # build the json data url
        iid_string_tail     = ''
        subpage_url_list    = []
        for ix in range(require_page_cnt):
            # one subpage only include 6*8 valid image, others are invalid
            tmp_tail_nbr = dl.ONE_PAGE_COMMIT * (ix + 1)
            tmp_tail_nbr = self.ira_max_cnt if tmp_tail_nbr > self.ira_max_cnt else tmp_tail_nbr

            for index in self.ira_pure_idlist[(dl.ONE_PAGE_COMMIT * ix):tmp_tail_nbr]:
                iid_string_tail += dl.IDS_UNIT(index)
            subpage_url_list.append(dl.ALLREPOINFO_URL(self.user_input_id, iid_string_tail, 1 if ix == 0 else 0))
            iid_string_tail = ''                            # clear last cache

        # get all data from response xhr page into a temp list
        tmp_receive_list    = []
        tmp_ret             = []
        for i in range(require_page_cnt):
            tmp_ret = self.ira_crawl_subpage_data(i + 1, subpage_url_list[i])
            if not isinstance(tmp_ret, list):
                return dl.PUB_E_FAIL
            tmp_receive_list += tmp_ret

        repo_target_all_list = []
        for i in range(len(tmp_receive_list)):
            tmp_receive_list[i][1] = dl.UNICODE_ESCAPE(tmp_receive_list[i][1])
            tmp_receive_list[i][1] = dl.EMOJI_REPLACE(tmp_receive_list[i][1])
            # build original url without image format
            tmp = tmp_receive_list[i][2]
            tmp = tmp.replace('\\', '')
            tmp_receive_list[i][2] = dl.ORIGINAL_IMAGE_HEAD + tmp[-39:-7] + '.png'  # first original url
            repo_target_all_list.append(tmp_receive_list[i])

            # add other original image url by pageCount
            tmp_page_count_str = tmp_receive_list[i][3]
            if tmp_page_count_str.isdigit():
                index_page_count = int(tmp_page_count_str)
                if index_page_count != 1:
                    for px in range(index_page_count):
                        insert_item = [tmp_receive_list[i][0], 
                                        tmp_receive_list[i][1], 
                                        tmp_receive_list[i][2][:-5] + str(px) + '.png', 
                                        tmp_receive_list[i][3]]
                        repo_target_all_list.append(insert_item)
            else:
                log_content = dl.BR_CB('page count process error')
                self.wkv_cw_api.wca_logprowork(self.logpath, log_content)
                return dl.PUB_E_FAIL
        del tmp_receive_list

        alive_target_cnt    = len(repo_target_all_list)
        require_img_nbr     = 0

        if self.ir_mode == dl.MODE_INTERACTIVE:
            require_img_str = dl.LT_INPUT(dl.HL_CY('crawl all repo %d, whole target(s): %d, enter you want count: '
                % (self.ira_max_cnt, alive_target_cnt)))
            # if user input isn't number
            while not require_img_str.isdigit():
                dl.LT_PRINT(dl.BR_CB('input error, your input content was not a decimal number'))
                require_img_str = dl.LT_INPUT(dl.HL_CY('enter again(max is %d): ' % alive_target_cnt))
            require_img_nbr = int(require_img_str)
            if require_img_nbr <= 0:
                dl.LT_PRINT(dl.BR_CB('what the f**k is wrong with you?'))
                return dl.PUB_E_PARAM_FAIL
            require_img_nbr = alive_target_cnt if require_img_nbr > alive_target_cnt else require_img_nbr

        elif self.ir_mode == dl.MODE_SERVER:
            require_img_nbr = alive_target_cnt
            dl.LT_PRINT(dl.BY_CB('server mode auto crawl all of alive targets'))
        else:
            pass

        for k, i in enumerate(repo_target_all_list[:require_img_nbr]):
            self.ira_target_capture.append(i[2])
            self.ira_basepages.append(dl.BASEPAGE_URL(i[0]))

        log_content = 'illustrator [%s] id [%s], require image(s): %d, target table:' \
            % (self.ira_author_name, self.user_input_id, require_img_nbr)
        self.wkv_cw_api.wca_logprowork(self.logpath, log_content)

        image_info_table = PrettyTable(["ImageNumber", "ImageID", "ImageTitle", "ImagePageName"])
        for k, i in enumerate(repo_target_all_list[:require_img_nbr]):
            image_info_table.add_row([(k + 1), i[0], i[1], dl.FROM_URL_GET_IMG_NAME(i[2])])

        # damn emoji, maybe dump failed
        try:
            self.wkv_cw_api.wca_logprowork(self.logpath, str(image_info_table), False)
        except Exception as e:
            dl.LT_PRINT(dl.BR_CB('error: %s, dump prettytable interrupt' % str(e)))
        del repo_target_all_list

        return dl.PUB_E_OK
Exemplo n.º 6
0
def main():
    """main logic

    Get user input arguments and launch mode function
    :return:    none
    """
    select_option = dl.SELECT_RTN
    rtn_page_opt = dl.PAGE_ORDINARY
    rtn_rank_opt = dl.RANK_DAILY
    rtn_sex_opt = dl.SEX_NORMAL
    ira_illust_id_list = []

    print(dl.HL_CR(WkvCwApi.__doc__))
    mode_interactive_server = dl.MODE_INTERACTIVE if len(
        sys.argv) == 1 else dl.MODE_SERVER
    api_instance = WkvCwApi(mode_interactive_server)
    api_instance.wca_camouflage_login()

    while True:
        if mode_interactive_server == dl.MODE_INTERACTIVE:
            select_option = dl.LT_INPUT(
                dl.HL_CY('login completed, select mode: '))
        else:
            opts, args = getopt.getopt(
                sys.argv[1:], "hm:r:l:s:i:",
                ["help", "mode", "R18", "list", "sex", "id"])
            for opt, value in opts:
                if opt in ("-m", "--mode"):
                    select_option = value
                elif opt in ("-r", "--R18"):
                    rtn_page_opt = value
                elif opt in ("-l", "--list"):
                    rtn_rank_opt = value
                elif opt in ("-s", "--sex"):
                    rtn_sex_opt = value
                elif opt in ("-i", "--id"):
                    ira_illust_id_list = value.split(
                        ','
                    )  # server mode support multi-input id and split with ','
                elif opt in ("-h", "--help"):
                    print(dl.HL_CR(WkvCwApi.__doc__))
                    exit(dl.PUB_E_OK)

        if select_option == dl.SELECT_RTN:
            dl.LT_PRINT(dl.BY_CB('mode: [Ranking Top N]'))
            rtn_instance = rtn(dl.RANK_DIR, dl.LOG_PATH, dl.HTML_PATH,
                               api_instance, mode_interactive_server,
                               rtn_page_opt, rtn_rank_opt, rtn_sex_opt)
            rtn_instance.start()

        elif select_option == dl.SELECT_IRA:
            dl.LT_PRINT(dl.BY_CB('mode: [Illustrator Repository All]'))
            if mode_interactive_server == dl.MODE_SERVER:
                for ira_illust_id in ira_illust_id_list:
                    ira_instance = ira(dl.g_dl_work_dir, dl.LOG_NAME,
                                       dl.HTML_NAME, api_instance,
                                       mode_interactive_server, ira_illust_id)
                    ira_instance.start()
            else:
                ira_instance = ira(dl.g_dl_work_dir, dl.LOG_NAME, dl.HTML_NAME,
                                   api_instance, mode_interactive_server, '')
                ira_instance.start()

        elif select_option == dl.SELECT_HELP:
            print(dl.HL_CR(WkvCwApi.__doc__))

        elif select_option == dl.SELECT_EXIT:
            dl.LT_PRINT(dl.BY_CB("user exit program"))
            dl.crawler_logo()  # exit print logo
            exit(dl.PUB_E_OK)

        else:
            dl.nolog_raise_arguerr()

        if mode_interactive_server == dl.MODE_SERVER:
            exit(dl.PUB_E_OK)