def check_index_range(config, start_name, start_index, end_name, end_index, max_value=None) -> None: check(config, int, start_name, start_index) check(config, int, end_name, end_index) if start_index < 1: raise ValueError( log.merge(config, [ start_name, str(start_name), i18n.ErrorParameter, i18n.OutOfRange, ])) if end_index < 1: raise ValueError( log.merge(config, [ end_name, str(end_index), i18n.ErrorParameter, i18n.OutOfRange, ])) if start_index > end_index: raise ValueError( log.merge(config, [ start_name, str(start_index), i18n.MustSmallOrEqual, end_name, str(end_index), ])) if max_value is not None: if start_index > max_value: raise ValueError( log.merge(config, [ start_name, str(start_index), i18n.ErrorParameter, i18n.OutOfRange, str(max_value), ])) if end_index > max_value: raise ValueError( log.merge(config, [ end_name, i18n.ErrorParameter, i18n.OutOfRange, str(max_value), ]))
def reply_post(self, reply_type: int, board: str, content: str, sign_file=0, post_aid: str = None, post_index: int = 0) -> None: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) self.config.log_last_value = None check_value.check(self.config, int, 'reply_type', reply_type, value_class=data_type.reply_type) check_value.check(self.config, str, 'Board', board) check_value.check(self.config, str, 'Content', content) if post_aid is not None: check_value.check(self.config, str, 'PostAID', post_aid) if post_index != 0: newest_index = self._get_newest_index(data_type.index_type.BBS, board=board) check_value.check_index(self.config, 'PostIndex', post_index, max_value=newest_index) sign_file_list = [str(x) for x in range(0, 10)] sign_file_list.append('x') if str(sign_file) not in sign_file_list: raise ValueError( log.merge(self.config, ['SignFile', i18n.ErrorParameter])) if post_aid is not None and post_index != 0: raise ValueError( log.merge(self.config, [ 'PostIndex', 'PostAID', i18n.ErrorParameter, i18n.BothInput ])) self._check_board(board) try: from . import _api_reply_post except ModuleNotFoundError: import _api_reply_post _api_reply_post.reply_post(self, reply_type, board, content, sign_file, post_aid, post_index)
def throw_waterball(self, ptt_id, content) -> None: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) if self._unregistered_user: raise exceptions.UnregisteredUser(lib_util.get_current_func_name()) self.config.log_last_value = None check_value.check(self.config, str, 'ptt_id', ptt_id) check_value.check(self.config, str, 'content', content) if len(ptt_id) <= 2: raise ValueError( log.merge(self.config, ['ptt_id', i18n.ErrorParameter, ptt_id])) user = self._get_user(ptt_id) if '不在站上' in user.status: raise exceptions.UserOffline(ptt_id) try: from . import _api_waterball except ModuleNotFoundError: import _api_waterball return _api_waterball.throw_waterball(self, ptt_id, content)
def mail(self, ptt_id: str, title: str, content: str, sign_file) -> None: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) if self._unregistered_user: raise exceptions.UnregisteredUser(lib_util.get_current_func_name()) self.config.log_last_value = None check_value.check(self.config, str, 'ptt_id', ptt_id) check_value.check(self.config, str, 'title', title) check_value.check(self.config, str, 'content', content) check_sign_file = False for i in range(0, 10): if str(i) == sign_file or i == sign_file: check_sign_file = True break if not check_sign_file: sign_file = sign_file.lower() if sign_file != 'x': raise ValueError( log.merge(self.config, ['SignFile', i18n.ErrorParameter, sign_file])) try: from . import _api_mail except ModuleNotFoundError: import _api_mail _api_mail.mail(self, ptt_id, title, content, sign_file)
def check(config, value_type, name, value, value_class=None) -> None: if not isinstance(value, value_type): if value_type is str: raise TypeError( log.merge(config, [name, str(value), i18n.MustBe, i18n.String])) elif value_type is int: raise TypeError( log.merge(config, [name, str(value), i18n.MustBe, i18n.Integer])) elif value_type is bool: raise TypeError( log.merge(config, [name, str(value), i18n.MustBe, i18n.Boolean])) if value_class is not None: if not lib_util.check_range(value_class, value): raise ValueError(f'Unknown {name}', value)
def check_index(config, index_name, index, max_value=None) -> None: check(config, int, index_name, index) if index < 1: raise ValueError( log.merge(config, [ index_name, str(index), i18n.ErrorParameter, i18n.OutOfRange, f'{index} must bigger than 0' ])) if max_value is not None: if index > max_value: log.show_value(config, log.level.INFO, 'Index', index) log.show_value(config, log.level.INFO, 'max_value', max_value) raise ValueError( log.merge(config, [ index_name, str(index), i18n.ErrorParameter, i18n.OutOfRange, f'must between 0 ~ {max_value} but get {index}' ]))
def check_index(config, index_name, index, max_value=None) -> None: check(config, int, index_name, index) if index < 1: raise ValueError( log.merge(config, [ index_name, str(index), i18n.ErrorParameter, i18n.OutOfRange, ])) if max_value is not None: if index > max_value: log.show_value(config, log.level.INFO, 'Index', index) log.show_value(config, log.level.INFO, 'max_value', max_value) raise ValueError( log.merge(config, [ index_name, str(index), i18n.ErrorParameter, i18n.OutOfRange, ]))
def _get_user(self, user_id) -> data_type.UserInfo: check_value.check(self.config, str, 'UserID', user_id) if len(user_id) < 3: raise ValueError( log.merge(self.config, ['UserID', i18n.ErrorParameter, user_id])) try: from . import _api_get_user except ModuleNotFoundError: import _api_get_user return _api_get_user.get_user(self, user_id)
def post(self, board: str, title: str, content: str, post_type: int, sign_file) -> None: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) self.config.log_last_value = None check_value.check(self.config, str, 'Board', board) check_value.check(self.config, str, 'Title', title) check_value.check(self.config, str, 'Content', content) check_value.check(self.config, int, 'PostType', post_type) check_sign_file = False for i in range(0, 10): if str(i) == sign_file or i == sign_file: check_sign_file = True break if not check_sign_file: sign_file = sign_file.lower() if sign_file != 'x': raise ValueError( log.merge(self.config, ['SignFile', i18n.ErrorParameter, sign_file])) self._check_board(board) try: from . import _api_post except ModuleNotFoundError: import _api_post return _api_post.post(self, board, title, content, post_type, sign_file)
def markPost(api, mark_type: int, board: str, post_aid: str, post_index: int, search_type: int, search_condition: str) -> None: log.show_value(api.config, log.level.INFO, [i18n.PTT, i18n.Msg], i18n.MarkPost) check_value.check(api.config, int, 'mark_type', mark_type, value_class=data_type.mark_type) check_value.check(api.config, str, 'Board', board) if post_aid is not None: check_value.check(api.config, str, 'PostAID', post_aid) check_value.check(api.config, int, 'PostIndex', post_index) check_value.check(api.config, int, 'SearchType', search_type, value_class=data_type.post_search_type) if search_condition is not None: check_value.check(api.config, str, 'SearchCondition', search_condition) if len(board) == 0: raise ValueError(log.merge([i18n.Board, i18n.ErrorParameter, board])) if mark_type != data_type.mark_type.DeleteD: if post_index != 0 and isinstance(post_aid, str): raise ValueError( log.merge(api.config, [ 'PostIndex', 'PostAID', i18n.ErrorParameter, i18n.BothInput ])) if post_index == 0 and post_aid is None: raise ValueError( log.merge(api.config, ['PostIndex', 'PostAID', i18n.ErrorParameter])) if search_condition is not None and search_type == 0: raise ValueError( log.merge(api.config, [ 'SearchType', i18n.ErrorParameter, ])) if search_type == data_type.post_search_type.PUSH: try: S = int(search_condition) except ValueError: raise ValueError( log.merge(api.config, [ 'SearchCondition', i18n.ErrorParameter, ])) if not (-100 <= S <= 110): raise ValueError( log.merge(api.config, [ 'SearchCondition', i18n.ErrorParameter, ])) if post_aid is not None and search_condition is not None: raise ValueError( log.merge(api.config, [ 'PostAID', 'SearchCondition', i18n.ErrorParameter, i18n.BothInput, ])) if post_index != 0: newest_index = api._get_newest_index(data_type.index_type.BBS, board=board, search_type=search_type, search_condition=search_condition) check_value.check_index(api.config, 'PostIndex', post_index, max_value=newest_index) if mark_type == data_type.mark_type.UNCONFIRMED: # 批踢踢兔沒有待證文章功能 QQ if api.config.host == data_type.host_type.PTT2: raise exceptions.HostNotSupport(lib_util.get_current_func_name()) api._check_board(board, check_moderator=True) cmd_list = [] cmd_list.append(command.GoMainMenu) cmd_list.append('qs') cmd_list.append(board) cmd_list.append(command.Enter) cmd = ''.join(cmd_list) target_list = [ connect_core.TargetUnit( i18n.AnyKeyContinue, '任意鍵', response=' ', ), connect_core.TargetUnit([ '動畫播放中', ], '互動式動畫播放中', response=command.Ctrl_C, log_level=log.level.DEBUG), connect_core.TargetUnit([ '進板成功', ], screens.Target.InBoard, break_detect=True, log_level=log.level.DEBUG), ] index = api.connect_core.send(cmd, target_list) cmd_list = [] if post_aid is not None: cmd_list.append('#' + post_aid) cmd_list.append(command.Enter) elif post_index != 0: if search_condition is not None: if search_type == data_type.post_search_type.KEYWORD: cmd_list.append('/') elif search_type == data_type.post_search_type.AUTHOR: cmd_list.append('a') elif search_type == data_type.post_search_type.PUSH: cmd_list.append('Z') elif search_type == data_type.post_search_type.MARK: cmd_list.append('G') elif search_type == data_type.post_search_type.MONEY: cmd_list.append('A') cmd_list.append(search_condition) cmd_list.append(command.Enter) cmd_list.append(str(post_index)) cmd_list.append(command.Enter) if mark_type == data_type.mark_type.S: cmd_list.append('L') elif mark_type == data_type.mark_type.D: cmd_list.append('t') elif mark_type == data_type.mark_type.DeleteD: cmd_list.append(command.Ctrl_D) elif mark_type == data_type.mark_type.M: cmd_list.append('m') elif mark_type == data_type.mark_type.UNCONFIRMED: cmd_list.append(command.Ctrl_E + 'S') cmd = ''.join(cmd_list) target_list = [ connect_core.TargetUnit([i18n.DelAllMarkPost], '刪除所有標記', response='y' + command.Enter, log_level=log.level.INFO), connect_core.TargetUnit([ i18n.Mark, i18n.Success, ], screens.Target.InBoard, break_detect=True, log_level=log.level.INFO), ] index = api.connect_core.send(cmd, target_list)
def push(self, board: str, push_type: int, push_content: str, post_aid: str = None, post_index: int = 0) -> None: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) self.config.log_last_value = None check_value.check(self.config, str, 'Board', board) check_value.check(self.config, int, 'push_type', push_type, value_class=data_type.push_type) check_value.check(self.config, str, 'PushContent', push_content) if post_aid is not None: check_value.check(self.config, str, 'PostAID', post_aid) check_value.check(self.config, int, 'PostIndex', post_index) if len(board) == 0: raise ValueError( log.merge(self.config, [i18n.Board, i18n.ErrorParameter, board])) if post_index != 0 and isinstance(post_aid, str): raise ValueError( log.merge(self.config, [ 'PostIndex', 'PostAID', i18n.ErrorParameter, i18n.BothInput ])) if post_index == 0 and post_aid is None: raise ValueError( log.merge(self.config, [ 'PostIndex', 'PostAID', i18n.ErrorParameter, i18n.NoInput ])) if post_index != 0: newest_index = self._get_newest_index(data_type.index_type.BBS, board=board) check_value.check_index(self.config, 'PostIndex', post_index, newest_index) self._check_board(board) max_push_length = 33 push_list = [] temp_start_index = 0 temp_end_index = temp_start_index + 1 while temp_end_index <= len(push_content): temp = '' last_temp = None while len(temp.encode('big5-uao', 'replace')) < max_push_length: temp = push_content[temp_start_index:temp_end_index] if not len(temp.encode('big5-uao', 'replace')) < max_push_length: break elif push_content.endswith(temp): break elif temp.endswith('\n'): break elif last_temp == temp: break temp_end_index += 1 last_temp = temp push_list.append(temp.strip()) temp_start_index = temp_end_index temp_end_index = temp_start_index + 1 push_list = filter(None, push_list) for push in push_list: log.show_value(self.config, log.level.INFO, i18n.Push, push) for _ in range(2): try: self._push(board, push_type, push, post_aid=post_aid, post_index=post_index) break except exceptions.NoFastPush: # screens.show(self.config, self.connect_core.getScreenQueue()) log.log(self.config, log.level.INFO, '等待快速推文') time.sleep(5.2)
def crawl_board( self, crawl_type: int, post_handler, board: str, # BBS版本 start_index: int = 0, end_index: int = 0, start_aid: str = None, end_aid: str = None, search_type: int = 0, search_condition: str = None, query: bool = False, # 網頁版本 start_page: int = 0, end_page: int = 0) -> list: self._one_thread() self.config.log_last_value = None check_value.check(self.config, int, 'crawl_type', crawl_type, value_class=data_type.crawl_type) check_value.check(self.config, str, 'Board', board) if len(board) == 0: raise ValueError( log.merge(self.config, [i18n.Board, i18n.ErrorParameter, board])) if crawl_type == data_type.crawl_type.BBS: if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) check_value.check(self.config, int, 'SearchType', search_type) if search_condition is not None: check_value.check(self.config, str, 'SearchCondition', search_condition) if start_aid is not None: check_value.check(self.config, str, 'StartAID', start_aid) if end_aid is not None: check_value.check(self.config, str, 'EndAID', end_aid) if (start_aid is not None or end_aid is not None) and \ (start_index != 0 or end_index != 0): raise ValueError( log.merge( self.config, ['AID', 'Index', i18n.ErrorParameter, i18n.BothInput])) if (start_aid is not None or end_aid is not None) and \ (search_condition is not None): raise ValueError( log.merge(self.config, [ 'AID', 'SearchCondition', i18n.ErrorParameter, i18n.BothInput ])) if search_type == data_type.post_search_type.PUSH: try: S = int(search_condition) except ValueError: raise ValueError( log.merge(self.config, [ 'SearchCondition', i18n.ErrorParameter, ])) if not (-100 <= S <= 110): raise ValueError( log.merge(self.config, [ 'SearchCondition', i18n.ErrorParameter, ])) if start_index != 0: newest_index = self._get_newest_index( data_type.index_type.BBS, board=board, search_type=search_type, search_condition=search_condition) check_value.check_index_range(self.config, 'start_index', start_index, 'end_index', end_index, max_value=newest_index) elif start_aid is not None and end_aid is not None: start_index = self.get_post(board, post_aid=start_aid, query=True).index end_index = self.get_post(board, post_aid=end_aid, query=True).index check_value.check_index_range(self.config, 'start_index', start_index, 'end_index', end_index) else: raise ValueError( log.merge(self.config, [i18n.ErrorParameter, i18n.NoInput])) log.show_value(self.config, log.level.DEBUG, 'StartIndex', start_index) log.show_value(self.config, log.level.DEBUG, 'EndIndex', end_index) error_post_list = [] del_post_list = [] if self.config.log_level == log.level.INFO: PB = progressbar.ProgressBar(max_value=end_index - start_index + 1, redirect_stdout=True) for index in range(start_index, end_index + 1): for i in range(2): need_continue = False post = None try: post = self._get_post( board, post_index=index, search_type=search_type, search_condition=search_condition, query=query) except exceptions.ParseError as e: if i == 1: raise e need_continue = True except exceptions.UnknownError as e: if i == 1: raise e need_continue = True except exceptions.NoSuchBoard as e: if i == 1: raise e need_continue = True except exceptions.NoMatchTargetError as e: if i == 1: raise e need_continue = True except exceptions.ConnectionClosed as e: if i == 1: raise e log.log(self.config, log.level.INFO, i18n.RestoreConnection) self._login(self._ID, self._Password, self.config.kick_other_login) need_continue = True except exceptions.UseTooManyResources as e: if i == 1: raise e log.log(self.config, log.level.INFO, i18n.RestoreConnection) self._login(self._ID, self._Password, self.config.kick_other_login) need_continue = True if post is None: need_continue = True elif not post.pass_format_check: need_continue = True if need_continue: log.log(self.config, log.level.DEBUG, 'Wait for retry repost') time.sleep(0.1) continue break if self.config.log_level == log.level.INFO: PB.update(index - start_index) if post is None: error_post_list.append(index) continue if not post.pass_format_check: if post.aid is not None: error_post_list.append(post.aid) else: error_post_list.append(index) continue if post.delete_status != data_type.post_delete_status.NOT_DELETED: del_post_list.append(index) post_handler(post) if self.config.log_level == log.level.INFO: PB.finish() return error_post_list, del_post_list else: if self.config.host == data_type.host_type.PTT2: raise exceptions.HostNotSupport( lib_util.get_current_func_name()) # 網頁版本爬蟲 # https://www.ptt.cc/bbs/index.html # 1. 取得總共有幾頁 MaxPage newest_index = self._get_newest_index(data_type.index_type.WEB, board=board) # 2. 檢查 StartPage 跟 EndPage 有沒有在 1 ~ MaxPage 之間 check_value.check_index_range(self.config, 'StartPage', start_page, 'EndPage', end_page, max_value=newest_index) # 3. 把每篇文章(包括被刪除文章)欄位解析出來組合成 data_type.PostInfo error_post_list = [] del_post_list = [] # PostAID = "" _url = 'https://www.ptt.cc/bbs/' index = str(newest_index) if self.config.log_level == log.level.INFO: PB = progressbar.ProgressBar(max_value=end_page - start_page + 1, redirect_stdout=True) def deleted_post(post_title): if post_title.startswith('('): if '本文' in post_title: return data_type.post_delete_status.AUTHOR elif post_title.startswith('(已被'): return data_type.post_delete_status.MODERATOR else: return data_type.post_delete_status.UNKNOWN else: return data_type.post_delete_status.NOT_DELETED for index in range(start_page, newest_index + 1): log.show_value(self.config, log.level.DEBUG, 'CurrentPage', index) url = _url + board + '/index' + str(index) + '.html' r = requests.get(url, cookies={'over18': '1'}) if r.status_code != requests.codes.ok: raise exceptions.NoSuchBoard(self.config, board) soup = BeautifulSoup(r.text, 'html.parser') for div in soup.select('div.r-ent'): web = div.select('div.title a') post = { 'author': div.select('div.author')[0].text, 'title': div.select('div.title')[0].text.strip('\n').strip(), 'web': web[0].get('href') if web else '' } if post['title'].startswith('('): del_post_list.append(post['title']) if post['title'].startswith('(本文'): if '[' in post['title']: post['author'] = post['title'].split( '[')[1].split(']')[0] else: post['author'] = post['title'].split( '<')[1].split('>')[0] else: post['author'] = post['title'].split('<')[1].split( '>')[0] post = data_type.PostInfo( board=board, author=post['author'], title=post['title'], web_url='https://www.ptt.cc' + post['web'], delete_status=deleted_post(post['title'])) post_handler(post) if self.config.log_level == log.level.INFO: PB.update(index - start_page) log.show_value(self.config, log.level.DEBUG, 'DelPostList', del_post_list) # 4. 把組合出來的 Post 塞給 handler # 5. 顯示 progress bar if self.config.log_level == log.level.INFO: PB.finish() return error_post_list, del_post_list
def get_post(self, board: str, post_aid: str = None, post_index: int = 0, search_type: int = 0, search_condition: str = None, query: bool = False) -> data_type.PostInfo: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) self.config.log_last_value = None check_value.check(self.config, str, 'Board', board) if post_aid is not None: check_value.check(self.config, str, 'PostAID', post_aid) check_value.check(self.config, int, 'PostIndex', post_index) check_value.check(self.config, int, 'SearchType', search_type, value_class=data_type.post_search_type) if search_condition is not None: check_value.check(self.config, str, 'SearchCondition', search_condition) if len(board) == 0: raise ValueError( log.merge(self.config, [i18n.Board, i18n.ErrorParameter, board])) if post_index != 0 and isinstance(post_aid, str): raise ValueError( log.merge(self.config, [ 'PostIndex', 'PostAID', i18n.ErrorParameter, i18n.BothInput ])) if post_index == 0 and post_aid is None: raise ValueError( log.merge(self.config, ['PostIndex', 'PostAID', i18n.ErrorParameter])) if search_condition is not None and search_type == 0: raise ValueError( log.merge(self.config, [ 'SearchType', i18n.ErrorParameter, ])) if search_type == data_type.post_search_type.PUSH: try: S = int(search_condition) except ValueError: raise ValueError( log.merge(self.config, [ 'SearchCondition', i18n.ErrorParameter, ])) if not (-100 <= S <= 110): raise ValueError( log.merge(self.config, [ 'SearchCondition', i18n.ErrorParameter, ])) if post_aid is not None and search_condition is not None: raise ValueError( log.merge(self.config, [ 'PostAID', 'SearchCondition', i18n.ErrorParameter, i18n.BothInput, ])) if post_index != 0: newest_index = self._get_newest_index( data_type.index_type.BBS, board=board, search_type=search_type, search_condition=search_condition) if post_index < 1 or newest_index < post_index: raise ValueError( log.merge(self.config, [ 'PostIndex', i18n.ErrorParameter, i18n.OutOfRange, ])) self._check_board(board) for i in range(2): need_continue = False post = None try: post = self._get_post(board, post_aid, post_index, search_type, search_condition, query) except exceptions.ParseError as e: if i == 1: raise e need_continue = True except exceptions.UnknownError as e: if i == 1: raise e need_continue = True except exceptions.NoSuchBoard as e: if i == 1: raise e need_continue = True except exceptions.NoMatchTargetError as e: if i == 1: raise e need_continue = True if post is None: need_continue = True elif not post.pass_format_check: need_continue = True if need_continue: log.log(self.config, log.level.DEBUG, 'Wait for retry repost') time.sleep(0.1) continue break return post