Пример #1
0
def getDaumNews():
    logger.info("----------getDaumNews()----------")
    source = requests.get(target_url['news_daum']).text
    soup = BeautifulSoup(source, 'html.parser')
    elem_list_title = soup.select("div.cont_thumb .tit_thumb a")
    elem_list_desc = soup.select("div.cont_thumb .desc_thumb span")

    titles = []
    descs = []
    datas = []
    url = 'https://search.daum.net/search?w=news&nil_search=btn&DA=NTB&enc=utf8&cluster=y&cluster_page=1&q='
    for i, v in enumerate(elem_list_title):
        titles.append(v.text + '\t' + v.attrs['href'])

    for i, v in enumerate(elem_list_desc):
        descs.append(v.text.strip())
    fn.aryLenSync(titles, descs)
    for i, v in enumerate(titles):
        data = '%s\t%s\t%s\t%s\t%s\n' % (
            target_code['news_daum'], createTime, fn.getStrNo(i + 1),
            fn.getConvData(titles[i].split('\t')[0]),
            url + fn.getEncodeUrl(titles[i].split('\t')[0]))
        datas.append(data)
        logger.debug(data)

    return datas
Пример #2
0
def getDaumKeyword():

    logger.info("----------getDaumKeyword()----------")
    logger.debug("get Source Datas")
    source1 = requests.get(target_url['keyword_daum1']).text
    source2 = requests.get(target_url['keyword_daum2']).text
    soup1 = BeautifulSoup(source1, 'html.parser')
    soup2 = BeautifulSoup(source2, 'html.parser')
    elem_list1 = soup1.select(".list_mini .rank_cont .link_issue")
    elem_list2 = soup2.select(
        "#daumWrap #daumContent #mAside #ratNewsCollDetail .keyword_rank .link_txt"
    )

    searchword_list = []
    for i, v in enumerate(elem_list1):
        searchword_list.append(fn.getConvData(v.get_text()))
    for i, v in enumerate(elem_list2):
        searchword_list.append(fn.getConvData(v.get_text()))

    logger.debug("parsing Datas")
    datas = []
    code = ''
    num = ''
    url = 'https://search.daum.net/search?w=tot&q='
    for i, v in enumerate(searchword_list):
        if i + 1 <= 10 and i + 1 > 0:
            code = target_code['keyword_daum1']
            num = fn.getStrNo(i + 1)
        if i + 1 <= 20 and i + 1 > 10:
            code = target_code['keyword_daum2']
            num = fn.getStrNo(i + 1 - 10)
        if i + 1 <= 30 and i + 1 > 20:
            code = target_code['keyword_daum3']
            num = fn.getStrNo(i + 1 - 20)
        if i + 1 <= 40 and i + 1 > 30:
            code = target_code['keyword_daum4']
            num = fn.getStrNo(i + 1 - 30)
        data = "%s\t%s\t%s\t%s\t%s\t\n" % (code, createTime, num, v,
                                           url + fn.getEncodeUrl(v))
        datas.append(data)
        logger.debug(data)

    logger.debug('succ pasrsing')
    return datas
Пример #3
0
def getNaverKeyword():
    logger.info("----------getNaverKeyword()----------")
    logger.debug("get Source Datas")
    source1 = requests.get(target_url['keyword_naver1']).text
    jsonObject = json.loads(str(source1))
    data = jsonObject['data']

    source2 = requests.get(target_url['keyword_naver2']).text
    soup2 = BeautifulSoup(source2, 'html.parser')
    elem_list2 = soup2.select(".realtime_srch .lst_realtime_srch li .tit")

    searchword_list = []
    for i, v in enumerate(data):
        searchword_list.append(fn.getConvData(v['keyword']))
    for i, v in enumerate(elem_list2):
        if i < 40:
            searchword_list.append(fn.getConvData(v.get_text()))

    logger.debug("parsing Datas")
    datas = []
    code = ''
    num = ''
    url = 'https://search.naver.com/search.naver?where=nexearch&query='
    for i, v in enumerate(searchword_list):

        if i + 1 <= 20 and i + 1 > 0:
            code = target_code['keyword_naver1']
            num = fn.getStrNo(i + 1)
        if i + 1 <= 30 and i + 1 > 20:
            code = target_code['keyword_naver2']
            num = fn.getStrNo(i + 1 - 20)
        if i + 1 <= 40 and i + 1 > 30:
            code = target_code['keyword_naver3']
            num = fn.getStrNo(i + 1 - 30)

        data = "%s\t%s\t%s\t%s\t%s\t\n" % (code, createTime, num, v,
                                           url + fn.getEncodeUrl(v))
        if i + 1 <= 40:
            datas.append(data)
            logger.debug(data)

    logger.debug('succ pasrsing')
    return datas
Пример #4
0
def getYoutubeKeyword():
    logger.info("----------getYoutubeKeyword()----------")

    logger.debug("get Source Datas")
    source = requests.get(target_url['keyword_youtube']).text
    soup = BeautifulSoup(source, 'html.parser')
    elem_list = soup.select("h3.yt-lockup-title > a ")

    logger.debug("parsing Datas")
    datas = []
    url = 'https://www.youtube.com'
    for i, v in enumerate(elem_list):
        if 'title' in v.attrs:
            data = "%s\t%s\t%s\t%s\t%s\n" % (
                target_code['keyword_youtube'], createTime, fn.getStrNo(i + 1),
                fn.getConvData(v.attrs['title']), url + v.attrs['href'])
            datas.append(data)
            logger.debug(data)
    logger.debug('succ pasrsing')
    return datas
Пример #5
0
def getNaverNews():
    logger.info("----------getNaverNews()----------")
    source = requests.get(target_url['news_naver']).text
    soup = BeautifulSoup(source, 'html.parser')
    elem_list = soup.select(".ranking_section ol li dl a")

    datas = []
    type = ''
    num = ''
    url = 'https://search.naver.com/search.naver?where=news&sm=tab_jum&query='
    for i, v in enumerate(elem_list):
        content = v.attrs['title']
        if i + 1 <= 5:
            type = '정치'
            num = i + 1
        if i + 1 <= 10 and i + 1 > 5:
            type = '경제'
            num = i + 1 - 5
        if i + 1 <= 15 and i + 1 > 10:
            type = '사회'
            num = i + 1 - 10
        if i + 1 <= 20 and i + 1 > 15:
            type = '생활/문화'
            num = i + 1 - 15
        if i + 1 <= 25 and i + 1 > 20:
            type = '세계'
            num = i + 1 - 20
        if i + 1 <= 30 and i + 1 > 25:
            type = 'IT/과학'
            num = i + 1 - 25

        data = '%s\t%s\t%s\t(%s)%s\t%s\n' % (
            target_code['news_naver'], createTime, fn.getStrNo(num), type,
            fn.getConvData(content), url + fn.getEncodeUrl(content))
        datas.append(data)
        logger.debug(data)

    return datas