class TestCna(unittest.TestCase): """ 中央社搜尋測試 """ def setUp(self): self.keyword = '上吊' self.nsearch = NewsSearch('cna', limit=10) def test_01_filter_title(self): """ 測試中央社搜尋 """ results = self.nsearch.by_keyword(self.keyword, title_only=True).to_dict_list() for topic in results: if '上吊' not in topic['title']: self.fail('標題必須含有 "上吊"') def test_02_search_and_soup(self): """ 測試中央社搜尋+分解 """ nsoups = self.nsearch.by_keyword(self.keyword).to_soup_list() for nsoup in nsoups: if nsoup.contents() is None: self.fail('內文不可為 None')
class TestAppleDaily(unittest.TestCase): """ 蘋果日報搜尋測試 """ def setUp(self): self.keyword = '上吊' self.nsearch = NewsSearch('appledaily', limit=10) def test_01_filter_title(self): """ 測試蘋果日報搜尋 """ results = self.nsearch.by_keyword(self.keyword, title_only=True).to_dict_list() for topic in results: if '上吊' not in topic['title']: self.fail('標題必須含有 "上吊"') def test_02_search_and_soup(self): """ 測試蘋果日報搜尋+分解 """ nsoups = self.nsearch.by_keyword(self.keyword).to_soup_list() for nsoup in nsoups: if nsoup.contents() is None: # 因為 home.appledaily.com.tw 的 SSL 憑證有問題,忽略這個因素造成的錯誤 if not nsoup.path.startswith('https://home.appledaily.com.tw'): msg = '內文不可為 None, URL={}'.format(nsoup.path) self.fail(msg)
class TestEttoday(unittest.TestCase): """ 東森新聞雲搜尋測試 """ def setUp(self): self.keyword = '上吊' self.nsearch = NewsSearch('ettoday', limit=10, proxy_first=True) def test_01_filter_title(self): """ 測試東森新聞雲搜尋 """ results = self.nsearch.by_keyword(self.keyword, title_only=True).to_dict_list() for topic in results: if '上吊' not in topic['title']: self.fail('標題必須含有 "上吊"') def test_02_search_and_soup(self): """ 測試東森新聞雲搜尋+分解 """ nsoups = self.nsearch.by_keyword(self.keyword).to_soup_list() for nsoup in nsoups: if nsoup.contents() is None: msg = '內文不可為 None, URL={}'.format(nsoup.path) self.fail(msg)
def compare_keyword(keyword): """ 比較關鍵字在各媒體的出現次數 """ print('比較上個月 "{}" 在各媒體標題出現次數'.format(keyword)) now = datetime.now() nts = now.timestamp() nts = nts - nts % 86400 day_lmon = datetime.fromtimestamp(nts - 86400 * now.day).day beg_date = datetime(now.year, now.month - 1, 1).strftime('%Y-%m-%d') end_date = datetime(now.year, now.month - 1, day_lmon).strftime('%Y-%m-%d') print('時間區間: {} ~ {}'.format(beg_date, end_date)) media = { 'appledaily': ' 蘋果', 'cna': '中央社', 'ettoday': ' 東森', 'ltn': ' 自由', 'setn': ' 三立', 'udn': ' 聯合' } for (channel, name) in media.items(): nsearch = NewsSearch(channel, beg_date=beg_date, end_date=end_date, limit=999) results = nsearch.by_keyword(keyword, title_only=True).to_dict_list() msg = '{}: {}'.format(name, len(results)) print(msg, flush=True)
def search_and_compare_performance(keyword): """ search_and_compare_performance(keyword): """ print('測試各家新聞台的搜尋效能') summary = {} for channel in ['appledaily', 'cna', 'ettoday', 'ltn', 'setn', 'udn']: print() print(channel) print('-' * 60) summary[channel] = [] for repeat in range(3): nsearch = NewsSearch(channel, limit=100) nsearch.by_keyword(keyword) results = nsearch.to_dict_list() total = len(results) tpp = nsearch.elapsed() / nsearch.pages() tpr = nsearch.elapsed() / total summary[channel].append(tpp) msg = '{:03d}: {:.3f} 秒/頁, {:.3f} 秒/筆, 共 {} 頁, 總耗時: {:.3f} 秒' print( msg.format(repeat, tpp, tpr, nsearch.pages(), nsearch.elapsed())) print('-' * 60) print() print('Markdown 摘要表:') print() print(' | 1st | 2nd | 3rd') print('---- | ---- | ---- | ----') for (channel, samples) in summary.items(): print(channel, end='') for sample in samples: print(' | {:.3f}'.format(sample), end='') print() print()
def search_and_list(keyword, channel): """ 搜尋,然後列出新聞標題 """ print('測試搜尋') nsearch = NewsSearch(channel, limit=10) results = nsearch.by_keyword(keyword).to_dict_list() logger = get_logger() for (i, result) in enumerate(results): try: print('{:03d}: {}'.format(i, result['title'])) print(' 日期: {}'.format(result['date'])) print(' 連結: {}'.format(result['link'])) except ValueError as ex: logger.error('例外類型: %s', type(ex).__name__) logger.error(ex)
def search_and_soup(keyword, channel): """ 搜尋,然後分解新聞 """ print('測試搜尋與分解, 搜尋中 ...', end='', flush=True) logger = get_logger() nsearch = NewsSearch(channel, limit=10) nsoups = nsearch.by_keyword(keyword).to_soup_list() print('\r測試搜尋與分解' + ' ' * 20, flush=True) for (i, nsoup) in enumerate(nsoups): try: print('{:03d}: {}'.format(i, nsoup.path)) print(' 記者: {} / 日期: {}'.format(nsoup.author(), nsoup.date())) print(' 標題: {}'.format(nsoup.title())) print(' {} ...'.format(nsoup.contents(30)), flush=True) except ValueError as ex: logger.error('例外類型: %s', type(ex).__name__) logger.error(ex)
def search_and_soup(keyword, channel): """ search_and_soup(keyword, channel) """ print('測試搜尋與分解') logger = get_logger() nsearch = NewsSearch(channel, limit=10) nsoups = nsearch.by_keyword(keyword).to_soup_list() for (i, nsoup) in enumerate(nsoups): try: print('{:03d}: {}'.format(i, nsoup.path)) print(' 記者: {} / 日期: {}'.format(nsoup.author(), nsoup.date())) print(' 標題: {}'.format(nsoup.title())) print(' {} ...'.format(nsoup.contents()[0:30])) except Exception as ex: logger.error('例外類型: %s', type(ex).__name__) logger.error(ex)