示例#1
0
文件: tv.py 项目: toz/files_move
 def get_lxml_by_url(url):
     try:
         from framework.common.daum import headers, session
         from system.logic_site import SystemLogicSite
         res = session.get(url, headers=headers, cookies=SystemLogicSite.get_daum_cookies())
         data = res.content
         root = lxml.html.fromstring(data)
         return root
     except Exception as e:
         logger.error('Exception:%s', e)
         logger.error(traceback.format_exc())
示例#2
0
文件: tv.py 项目: toz/files_move
 def get_html(url):
     try:
         #from . import headers, cookies
         #res = Logic.session.get(url, headers=headers, cookies=cookies)
         from framework.common.daum import headers, session
         from system.logic_site import SystemLogicSite
         res = session.get(url, headers=headers, cookies=SystemLogicSite.get_daum_cookies())
         data = res.content
         return data
     except Exception as e:
         logger.error('Exception:%s', e)
         logger.error(traceback.format_exc())
示例#3
0
    def daum_get_ratings_list(keyword):
        try:
            # drama_keywords = {'월화드라마', '수목드라마', '금요/주말드라마', '일일/아침드라마'}
            # ent_keywords = {'월요일예능', '화요일예능', '수요일예능', '목요일예능', '금요일예능', '토요일예능', '일요일예능'}
            from framework.common.daum import headers, session
            from system.logic_site import SystemLogicSite
            url = 'https://search.daum.net/search?w=tot&q=%s' % py_urllib.quote(
                keyword.encode('utf8'))
            res = session.get(url,
                              headers=headers,
                              cookies=SystemLogicSite.get_daum_cookies())
            html = res.content
            root = lxml.html.fromstring(html)
            list_program = root.xpath(
                '//ol[@class="list_program item_cont"]/li')

            data = []
            for item in list_program:
                data_item = {}
                data_item['title'] = item.xpath('./div/strong/a/text()')[0]
                data_item['air_time'] = item.xpath('./div/span[1]/text()')[0]
                data_item['provider'] = item.xpath(
                    './div/span[@class="txt_subinfo"][2]/text()')[0]
                data_item['image'] = item.xpath('./a/img/@src')
                data_item['scheduled'] = item.xpath(
                    './div/span[@class="txt_subinfo"]/span[@class="txt_subinfo"]/text()'
                )
                data_item['ratings'] = item.xpath(
                    './div/span[@class="txt_subinfo"][2]/span[@class="f_red"]/text()'
                )

                if len(data_item['image']):
                    data_item['image'] = data_item['image'][0]
                else:
                    data_item[
                        'image'] = 'http://www.okbible.com/data/skin/okbible_1/images/common/noimage.gif'
                    # data_item['image'] = 'https://search1.daumcdn.net/search/statics/common/pi/thumb/noimage_151203.png'
                if len(data_item['scheduled']):
                    data_item['scheduled'] = data_item['scheduled'][0]
                if len(data_item['ratings']):
                    data_item['ratings'] = data_item['ratings'][0]

                data.append(data_item)

            return data
        except Exception as e:
            logger.error('Exception:%s', e)
            logger.error(traceback.format_exc())
示例#4
0
    def search_movie_web(movie_list, movie_name, movie_year):
        try:
            url = 'https://suggest-bar.daum.net/suggest?id=movie&cate=movie&multiple=1&mod=json&code=utf_in_out&q=%s' % (
                urllib.quote(movie_name.encode('utf8')))
            #from . import headers, cookies
            #res = Logic.session.get(url, headers=headers, cookies=cookies)
            from framework.common.daum import headers, session
            from system.logic_site import SystemLogicSite
            res = session.get(url,
                              headers=headers,
                              cookies=SystemLogicSite.get_daum_cookies())
            data = res.json()
            movie_cmp = re.sub('[\\/:*?"<>|]', '', movie_name)
            for index, item in enumerate(data['items']['movie']):
                tmps = item.split('|')
                score = 85
                tmps[0] = re.sub('[\\/:*?"<>|]', '', tmps[0])
                if tmps[0] == movie_cmp and int(tmps[3]) == int(movie_year):
                    score = 95
                ##elif tmps[0].find(movie_cmp) != -1 and int(tmps[3]) == int(movie_year):
                ##score = 95
                elif tmps[3] == movie_year or abs(
                        int(tmps[3]) - int(movie_year)) <= 1:
                    score = score + 6
                else:
                    score -= index * 5

                if score < 10:
                    score = 10
                MovieSearch.movie_append(
                    movie_list, {
                        'id': tmps[1],
                        'title': tmps[0],
                        'year': tmps[3],
                        'score': score
                    })

        except Exception as e:
            logger.error('Exception:%s', e)
            logger.error(traceback.format_exc())

        try:
            url = 'https://search.daum.net/search?nil_suggest=btn&w=tot&DA=SBC&q=%s%s' % (
                '%EC%98%81%ED%99%94+', urllib.quote(movie_name.encode('utf8')))
            ret = MovieSearch.get_movie_info_from_home(url)
            if ret is not None:
                if ret['year'] == movie_year:
                    score = 100
                    need_another_search = False
                else:
                    score >= 90
                    need_another_search = True
                MovieSearch.movie_append(
                    movie_list, {
                        'id': ret['daum_id'],
                        'title': ret['title'],
                        'year': ret['year'],
                        'score': score,
                        'country': ret['country'],
                        'more': ret['more']
                    })
                logger.debug('need_another_search : %s' % need_another_search)
                movie = ret['movie']
                if need_another_search:
                    tmp = movie.find('div[@class="coll_etc"]')
                    if tmp is not None:
                        tag_list = tmp.findall('.//a')
                        first_url = None
                        for tag in tag_list:
                            match = re.compile('(.*?)\\((.*?)\\)').search(
                                tag.text_content())
                            if match:
                                daum_id = tag.attrib['href'].split('||')[1]
                                score = 80
                                if match.group(
                                        1) == movie_name and match.group(
                                            2) == movie_year:
                                    first_url = 'https://search.daum.net/search?%s' % tag.attrib[
                                        'href']
                                elif match.group(
                                        2
                                ) == movie_year and first_url is not None:
                                    first_url = 'https://search.daum.net/search?%s' % tag.attrib[
                                        'href']
                                MovieSearch.movie_append(
                                    movie_list, {
                                        'id': daum_id,
                                        'title': match.group(1),
                                        'year': match.group(2),
                                        'score': score
                                    })

                        logger.debug('first_url : %s' % first_url)
                        if need_another_search and first_url is not None:
                            new_ret = MovieSearch.get_movie_info_from_home(
                                first_url)
                            MovieSearch.movie_append(
                                movie_list, {
                                    'id': new_ret['daum_id'],
                                    'title': new_ret['title'],
                                    'year': new_ret['year'],
                                    'score': 100,
                                    'country': new_ret['country'],
                                    'more': new_ret['more']
                                })
                    tmp = movie.find('.//ul[@class="list_thumb list_few"]')
                    logger.debug('SERIES:%s' % tmp)
                    if tmp is not None:
                        tag_list = tmp.findall('.//div[@class="wrap_cont"]')
                        first_url = None
                        score = 80
                        for tag in tag_list:
                            a_tag = tag.find('a')
                            daum_id = a_tag.attrib['href'].split('||')[1]
                            daum_name = a_tag.text_content()
                            span_tag = tag.find('span')
                            year = span_tag.text_content()
                            logger.debug('daum_id:%s %s %s' %
                                         (daum_id, year, daum_name))
                            if daum_name == movie_name and year == movie_year:
                                first_url = 'https://search.daum.net/search?%s' % a_tag.attrib[
                                    'href']
                            elif year == movie_year and first_url is not None:
                                first_url = 'https://search.daum.net/search?%s' % tag.attrib[
                                    'href']
                            MovieSearch.movie_append(
                                movie_list, {
                                    'id': daum_id,
                                    'title': daum_name,
                                    'year': year,
                                    'score': score
                                })
                            logger.debug('first_url : %s' % first_url)

                        if need_another_search and first_url is not None:
                            new_ret = MovieSearch.get_movie_info_from_home(
                                first_url)
                            MovieSearch.movie_append(
                                movie_list, {
                                    'id': new_ret['daum_id'],
                                    'title': new_ret['title'],
                                    'year': new_ret['year'],
                                    'score': 100,
                                    'country': new_ret['country'],
                                    'more': new_ret['more']
                                })
            try:
                movie_list = list(
                    reversed(sorted(movie_list, key=lambda k: k['score'])))
                logger.debug('smw - id: %s, score:%s, myear:%s, year:%s',
                             movie_list[0]['id'], movie_list[0]['score'],
                             movie_year, movie_list[0]['year'])
                id_url = 'http://movie.daum.net/data/movie/movie_info/detail.json?movieId=%s' % movie_list[
                    0]['id']
                #from . import headers, cookies
                #res = Logic.session.get(id_url, headers=headers, cookies=cookies)
                from framework.common.daum import headers, session
                from system.logic_site import SystemLogicSite
                res = session.get(id_url,
                                  headers=headers,
                                  cookies=SystemLogicSite.get_daum_cookies())
                meta_data = res.json()
                logger.debug('smw - more search')
                if meta_data is not None:
                    logger.debug('smw - more search....ing')
                    info = meta_data['data']
                    if int(movie_list[0]['year']) == 0:
                        movie_list[0]['year'] = unicode(info['prodYear'])
                    elif int(movie_year) == int(info['prodYear']):
                        movie_list[0]['year'] = unicode(info['prodYear'])
                        movie_list[0]['score'] = movie_list[0]['score'] + 5
                    movie_list[0]['title'] = info['titleKo']
                    logger.debug('smw - eng title:%s', info['titleEn'])
                    movie_list[0].update({
                        'more': {
                            'eng_title': "",
                            'rate': "",
                            'during': "",
                            'genre': []
                        }
                    })
                    movie_list[0]['more']['during'] = unicode(info['showtime'])
                    if info['admissionDesc']:
                        movie_list[0]['more']['rate'] = info['admissionDesc']
                        logger.debug('smw - rate:%s',
                                     movie_list[0]['more']['rate'])
                    movie_list[0]['more']['eng_title'] = info['titleEn']
                    for item in info['countries']:
                        movie_list[0]['country'] = item['countryKo']
                        break
                    for item in info['genres']:
                        movie_list[0]['more']['genre'].append(
                            item['genreName'])
                        logger.debug('%s', item['genreName'])

            except Exception as e:
                pass
                #logger.error('Exception:%s', e)
                #logger.error(traceback.format_exc())
        except Exception as e:
            logger.error('Exception:%s', e)
            logger.error(traceback.format_exc())

        movie_list = list(
            reversed(sorted(movie_list, key=lambda k: k['score'])))
        return movie_list
示例#5
0
    def get_movie_info_from_home(url):
        try:
            #from . import headers, cookies
            #res = Logic.session.get(url, headers=headers, cookies=cookies)
            from framework.common.daum import headers, session
            from system.logic_site import SystemLogicSite
            res = session.get(url,
                              headers=headers,
                              cookies=SystemLogicSite.get_daum_cookies())
            data = res.content
            html = lxml.html.document_fromstring(data)
            movie = None
            try:
                movie = html.get_element_by_id('movieEColl')
            except Exception as e:
                pass

            if movie is None:
                logger.debug('gmifh - movie is none')
                return
            title_tag = movie.get_element_by_id('movieTitle')
            a_tag = title_tag.find('a')
            href = a_tag.attrib['href']
            title = a_tag.find('b').text_content()
            tmp = title_tag.text_content()
            tmp_year = ''
            match = re.compile(u'(?P<year>\\d{4})\\s\uc81c\uc791').search(tmp)
            more = {}
            if match:
                tmp_year = match.group('year')
                more['eng_title'] = tmp.replace(title, '').replace(
                    tmp_year, '').replace(u'\uc81c\uc791',
                                          '').replace(u',', '').strip()
            country_tag = movie.xpath('//div[3]/div/div[1]/div[2]/dl[1]/dd[2]')
            country = ''
            if country_tag:
                country = country_tag[0].text_content().split('|')[0].strip()
                logger.debug(country)
            more['poster'] = movie.xpath(
                '//*[@id="nmovie_img_0"]/a/img')[0].attrib['src']
            more['title'] = movie.xpath(
                '//*[@id="movieTitle"]/span')[0].text_content()
            tmp = movie.xpath(
                '//*[@id="movieEColl"]/div[3]/div/div[1]/div[2]/dl')
            more['info'] = []
            more['info'].append(country_tag[0].text_content().strip())
            logger.debug(more['info'][0])
            tmp = more['info'][0].split('|')
            if len(tmp) == 5:
                more['country'] = tmp[0].replace(u'\uc678', '').strip()
                more['genre'] = tmp[1].replace(u'\uc678', '').strip()
                more['date'] = tmp[2].replace(u'\uac1c\ubd09', '').strip()
                more['rate'] = tmp[3].strip()
                more['during'] = tmp[4].strip()
            elif len(tmp) == 4:
                more['country'] = tmp[0].replace(u'\uc678', '').strip()
                more['genre'] = tmp[1].replace(u'\uc678', '').strip()
                more['date'] = ''
                more['rate'] = tmp[2].strip()
                more['during'] = tmp[3].strip()
            elif len(tmp) == 3:
                more['country'] = tmp[0].replace(u'\uc678', '').strip()
                more['genre'] = tmp[1].replace(u'\uc678', '').strip()
                more['date'] = ''
                more['rate'] = ''
                more['during'] = tmp[2].strip()
            daum_id = href.split('=')[1]
            return {
                'movie': movie,
                'title': title,
                'daum_id': daum_id,
                'year': tmp_year,
                'country': country,
                'more': more
            }
        except Exception as e:
            logger.error('Exception:%s', e)
            logger.error(traceback.format_exc())

        return