def getDaumNews(): logger.info("----------getDaumNews()----------") source = requests.get(target_url['news_daum']).text soup = BeautifulSoup(source, 'html.parser') elem_list_title = soup.select("div.cont_thumb .tit_thumb a") elem_list_desc = soup.select("div.cont_thumb .desc_thumb span") titles = [] descs = [] datas = [] url = 'https://search.daum.net/search?w=news&nil_search=btn&DA=NTB&enc=utf8&cluster=y&cluster_page=1&q=' for i, v in enumerate(elem_list_title): titles.append(v.text + '\t' + v.attrs['href']) for i, v in enumerate(elem_list_desc): descs.append(v.text.strip()) fn.aryLenSync(titles, descs) for i, v in enumerate(titles): data = '%s\t%s\t%s\t%s\t%s\n' % ( target_code['news_daum'], createTime, fn.getStrNo(i + 1), fn.getConvData(titles[i].split('\t')[0]), url + fn.getEncodeUrl(titles[i].split('\t')[0])) datas.append(data) logger.debug(data) return datas
def getDaumKeyword(): logger.info("----------getDaumKeyword()----------") logger.debug("get Source Datas") source1 = requests.get(target_url['keyword_daum1']).text source2 = requests.get(target_url['keyword_daum2']).text soup1 = BeautifulSoup(source1, 'html.parser') soup2 = BeautifulSoup(source2, 'html.parser') elem_list1 = soup1.select(".list_mini .rank_cont .link_issue") elem_list2 = soup2.select( "#daumWrap #daumContent #mAside #ratNewsCollDetail .keyword_rank .link_txt" ) searchword_list = [] for i, v in enumerate(elem_list1): searchword_list.append(fn.getConvData(v.get_text())) for i, v in enumerate(elem_list2): searchword_list.append(fn.getConvData(v.get_text())) logger.debug("parsing Datas") datas = [] code = '' num = '' url = 'https://search.daum.net/search?w=tot&q=' for i, v in enumerate(searchword_list): if i + 1 <= 10 and i + 1 > 0: code = target_code['keyword_daum1'] num = fn.getStrNo(i + 1) if i + 1 <= 20 and i + 1 > 10: code = target_code['keyword_daum2'] num = fn.getStrNo(i + 1 - 10) if i + 1 <= 30 and i + 1 > 20: code = target_code['keyword_daum3'] num = fn.getStrNo(i + 1 - 20) if i + 1 <= 40 and i + 1 > 30: code = target_code['keyword_daum4'] num = fn.getStrNo(i + 1 - 30) data = "%s\t%s\t%s\t%s\t%s\t\n" % (code, createTime, num, v, url + fn.getEncodeUrl(v)) datas.append(data) logger.debug(data) logger.debug('succ pasrsing') return datas
def getNaverKeyword(): logger.info("----------getNaverKeyword()----------") logger.debug("get Source Datas") source1 = requests.get(target_url['keyword_naver1']).text jsonObject = json.loads(str(source1)) data = jsonObject['data'] source2 = requests.get(target_url['keyword_naver2']).text soup2 = BeautifulSoup(source2, 'html.parser') elem_list2 = soup2.select(".realtime_srch .lst_realtime_srch li .tit") searchword_list = [] for i, v in enumerate(data): searchword_list.append(fn.getConvData(v['keyword'])) for i, v in enumerate(elem_list2): if i < 40: searchword_list.append(fn.getConvData(v.get_text())) logger.debug("parsing Datas") datas = [] code = '' num = '' url = 'https://search.naver.com/search.naver?where=nexearch&query=' for i, v in enumerate(searchword_list): if i + 1 <= 20 and i + 1 > 0: code = target_code['keyword_naver1'] num = fn.getStrNo(i + 1) if i + 1 <= 30 and i + 1 > 20: code = target_code['keyword_naver2'] num = fn.getStrNo(i + 1 - 20) if i + 1 <= 40 and i + 1 > 30: code = target_code['keyword_naver3'] num = fn.getStrNo(i + 1 - 30) data = "%s\t%s\t%s\t%s\t%s\t\n" % (code, createTime, num, v, url + fn.getEncodeUrl(v)) if i + 1 <= 40: datas.append(data) logger.debug(data) logger.debug('succ pasrsing') return datas
def getYoutubeKeyword(): logger.info("----------getYoutubeKeyword()----------") logger.debug("get Source Datas") source = requests.get(target_url['keyword_youtube']).text soup = BeautifulSoup(source, 'html.parser') elem_list = soup.select("h3.yt-lockup-title > a ") logger.debug("parsing Datas") datas = [] url = 'https://www.youtube.com' for i, v in enumerate(elem_list): if 'title' in v.attrs: data = "%s\t%s\t%s\t%s\t%s\n" % ( target_code['keyword_youtube'], createTime, fn.getStrNo(i + 1), fn.getConvData(v.attrs['title']), url + v.attrs['href']) datas.append(data) logger.debug(data) logger.debug('succ pasrsing') return datas
def getNaverNews(): logger.info("----------getNaverNews()----------") source = requests.get(target_url['news_naver']).text soup = BeautifulSoup(source, 'html.parser') elem_list = soup.select(".ranking_section ol li dl a") datas = [] type = '' num = '' url = 'https://search.naver.com/search.naver?where=news&sm=tab_jum&query=' for i, v in enumerate(elem_list): content = v.attrs['title'] if i + 1 <= 5: type = '정치' num = i + 1 if i + 1 <= 10 and i + 1 > 5: type = '경제' num = i + 1 - 5 if i + 1 <= 15 and i + 1 > 10: type = '사회' num = i + 1 - 10 if i + 1 <= 20 and i + 1 > 15: type = '생활/문화' num = i + 1 - 15 if i + 1 <= 25 and i + 1 > 20: type = '세계' num = i + 1 - 20 if i + 1 <= 30 and i + 1 > 25: type = 'IT/과학' num = i + 1 - 25 data = '%s\t%s\t%s\t(%s)%s\t%s\n' % ( target_code['news_naver'], createTime, fn.getStrNo(num), type, fn.getConvData(content), url + fn.getEncodeUrl(content)) datas.append(data) logger.debug(data) return datas