def parse_public_key(self, response): json_str = re.search(r'.*?\((.*)\)', response.body).group(1).replace('\'', '\"') utils.debug('获取Publickey:', json_str) data = json.loads(json_str) self.__pubkey = data['pubkey'] self.__rsakey = data['key'] yield self.__monitor_captcha(self.__get_username())
def parse_token(self, response): json_str = re.search(r'.*?\((.*)\)', response.body).group(1).replace('\'', '\"') utils.debug('获取Token:', json_str) data = json.loads(json_str) self.__token = data['data']['token'] yield self.__get_rsa_key()
def parse_post_note(self, response): json_obj = json.loads(response.body) no = int(json_obj['no']) if no == 0: utils.debug('发帖成功:', json.dumps(json_obj['data'])) tid = json_obj['data']['tid'] meta = response.meta['datas'] data = dict() data['fid'] = meta['fid'] data['id'] = tid data['kw'] = meta['kw'] data['tbs'] = meta['tbs'] data['title'] = meta['title'] data['content'] = meta['content'] data['timestamp'] = utils.timestamp item = TiebaItem(type=1) item['note'] = data yield item time.sleep(2) yield self.post_reply(tid) else: err_code = int(json_obj['err_code']) utils.debug('发帖失败:', get_post_err_msg(no, err_code, response.body)) if no == 40: vcode_obj = json_obj['data']['vcode'] input_captcha = utils.show_captcha(vcode_obj['captcha_vcode_str']) captcha_type = vcode_obj['captcha_code_type'] yield self.__check_captcha(captcha=input_captcha, captcha_type=captcha_type) yield self.post_note(input_captcha)
def parse_monitor_captcha(self, response): json_str = re.search(r'.*?\((.*)\)', response.body).group(1).replace('\'', '\"') utils.debug('验证码:', json_str) self.__code_string = json.loads(json_str)['codestring'] if self.__code_string is None or self.__code_string == 'null': yield self.__login() else: yield self.__get_captcha(self.__code_string)
def parse_tieba_index(self, response): text = response.body.replace('\r', '').replace('\n', '').strip() user_json_str = '{%s}' % re.search(r'PageData\.user = {(.*?)};', text).group(1) user_json_str = re.compile(r'/\*.*?\*/', re.I).sub('', user_json_str.replace('\'', '\"')) utils.debug('用户信息:', user_json_str) self.user_info = json.loads(user_json_str) self.tbs = re.search(r'PageData\.tbs = "(.*?)";', response.body.strip()).group(1) for request in self.parse_index_response(response): yield request
def parse_tieba_datas(self, response): text = response.body.replace('\r', '').replace('\n', '').strip() text_re = re.search(r'PageData\.forum = {(.*?)};', text) if text_re: tieba_json_str = '{%s}' % text_re.group(1) tieba_json_str = re.compile(r'/\*.*?\*/', re.I).sub( '', tieba_json_str.replace('\'', '\"')) utils.debug('贴吧信息:', tieba_json_str) tieba_json_obj = json.loads(tieba_json_str) self.fid = tieba_json_obj['id'] self.kw = tieba_json_obj['name'] yield self.post_note_or_reply() else: self.kw = raw_input('请输入贴吧名字:') self.post_datas = InputPost(self.kw) yield self.__get_teiba_datas(kw=self.kw)
def parse_onekey_signin(self, response): json_obj = json.loads(response.text) no = int(json_obj['no']) data = json_obj['data'] if no == 0: utils.debug('签到成功:', json.dumps(data)) else: error = data['str_reason'] if 'str_reason' in data else json_obj[ 'error'] utils.debug('签到失败:', error, strip=False) if no == 2150040: # 此处为人机验证的验证码,没法整,先放着 captcha_vcode_str = data['captcha_vcode_str'] # captcha_vcode_type = data['captcha_code_type'] input_captcha = '00010001000100000002000100020000' if input_captcha == '1': yield self.__onekey_signin(self.tbs) else: yield self.__onekey_signin(self.tbs, input_captcha, captcha_vcode_str) yield self.__get_my_forums()
def parse_login(self, response): errno = int(re.search(r'err_no=(\d+)', response.text).group(1)) self.__code_string = re.search(r'&codeString=(.*?)&', response.body).group(1) if errno == 0: self.__password = None utils.debug('登录成功') for request in self.parse_login_success(response): yield request else: utils.debug(get_login_err_msg(errno, response.body)) if errno == 1 or errno == 2 or errno == 4 or errno == 53 or errno == 58: if errno == 1 or errno == 2 or errno == 53 or errno == 58: self.__username = None utils.set_account(username='') self.__password = None utils.set_account(password='') yield self.__login(self.__code_string) elif errno == 6 or errno == 257: yield self.__get_captcha(self.__code_string) else: self.__username = None self.__password = None
def parse_post_reply(self, response): json_obj = json.loads(response.body) no = int(json_obj['no']) data_obj = json_obj['data'] tid = int(data_obj['tid']) if 'tid' in data_obj else 0 if no == 0 and tid != 0: utils.debug('评论成功:', json.dumps(json_obj['data'])) meta = response.meta['datas'] data = dict() data['fid'] = meta['fid'] data['kw'] = meta['kw'] data['tbs'] = meta['tbs'] data['tid'] = meta['tid'] data['content'] = meta['content'] data['timestamp'] = utils.timestamp item = TiebaItem(type=2) item['reply'] = data yield item time.sleep(60) yield self.post_reply(tid) else: err_code = int(json_obj['err_code']) utils.debug('评论失败:', get_post_err_msg(no, err_code, response.body)) if no == 220034 and tid != 0: time.sleep(300) yield self.post_reply(tid) if no == 40 and tid != 0: vcode_obj = json_obj['data']['vcode'] input_captcha = utils.show_captcha(vcode_obj['captcha_vcode_str']) captcha_type = vcode_obj['captcha_code_type'] yield self.__check_captcha(captcha=input_captcha, captcha_type=captcha_type) yield self.post_reply(tid, input_captcha)
def parse_single_signin(self, response): json_obj = json.loads(response.text) no = int(json_obj['no']) data = json_obj['data'] forum_name = urlparse.parse_qs(urllib.unquote( response.request.body))['kw'][0] tbs = urlparse.parse_qs(urllib.unquote( response.request.body))['tbs'][0] if no == 0: utils.debug('签到成功:', forum_name) else: error = data['str_reason'] if 'str_reason' in data else json_obj[ 'error'] utils.debug('签到失败:', error, '--->', forum_name, strip=False) if no == 2150040: # 此处为人机验证的验证码,没法整,先放着 captcha_vcode_str = data['captcha_vcode_str'] # captcha_vcode_type = data['captcha_code_type'] print captcha_vcode_str input_captcha = '00010001000100000002000100020000' if input_captcha == '1': yield self.__single_signin(tbs, forum_name) else: yield self.__single_signin(tbs, forum_name, input_captcha, captcha_vcode_str)
def parse_check_captcha(self, response): no = int(json.loads(response.body)['anti_valve_err_no']) utils.debug('验证码正确' if no == 0 else '验证码错误')