Пример #1
0
 def delete(self, content_type, data_hash, file_name):
     content_type = self.normalize(content_type)
     file_name = self.normalize(file_name)
     key = content_type + '/' + data_hash + '/' + file_name
     result = {key: memcache.delete(key)}
     content_type = py_urllib.unquote(content_type)
     if IMAGE_TYPES.match(content_type):
         thumbnail_key = key + THUMB_SUFFIX
         result[thumbnail_key] = memcache.delete(thumbnail_key)
     if 'application/json' in self.request.headers.get('Accept'):
         self.response.headers['Content-Type'] = 'application/json'
     s = self.json_stringify(result)
     self.response.write(s)
Пример #2
0
 def get(self, content_type, data_hash, file_name):
     content_type = self.normalize(content_type)
     file_name = self.normalize(file_name)
     key = content_type + '/' + data_hash + '/' + file_name
     data = memcache.get(key)
     if data is None:
         return self.error(404)
     # Prevent browsers from MIME-sniffing the content-type:
     self.response.headers['X-Content-Type-Options'] = 'nosniff'
     content_type = py_urllib.unquote(content_type)
     if not IMAGE_TYPES.match(content_type):
         # Force a download dialog for non-image types:
         content_type = 'application/octet-stream'
     elif file_name.endswith(THUMB_SUFFIX):
         content_type = 'image/png'
     self.response.headers['Content-Type'] = content_type
     # Cache for the expiration time:
     self.response.headers['Cache-Control'] = 'public,max-age=%d' \
         % EXPIRATION_TIME
     self.response.write(data)
Пример #3
0
    def generate():
        option = ModelSetting.get('trans_option').strip().split(' ')
        source = request.args.get('source')
        source = py_urllib.unquote(source)
        logger.debug(source)

        startTime = time.time()
        buffer = []
        sentBurst = False

        ffmpeg_command = ['ffmpeg', "-loglevel", "quiet", "-i", source
                          ] + option
        #, '-vcodec', 'libx264, "-c:a", "aac", "-b:a", "128k", "-f", "mpegts", "-tune", "zerolatency", "pipe:stdout"]

        logger.debug('command : %s', ffmpeg_command)
        process = subprocess.Popen(ffmpeg_command,
                                   stdout=subprocess.PIPE,
                                   stderr=subprocess.STDOUT,
                                   bufsize=-1)
        global process_list
        #process_list.append(process)
        #ix = len(process_list)-1

        while True:
            line = process.stdout.read(1024)
            buffer.append(line)
            if sentBurst is False and time.time() > startTime + 1 and len(
                    buffer) > 0:
                sentBurst = True
                for i in range(0, len(buffer) - 2):
                    yield buffer.pop(0)
            elif time.time() > startTime + 1 and len(buffer) > 0:
                yield buffer.pop(0)
            process.poll()
            if isinstance(process.returncode, int):
                if process.returncode > 0:
                    logger.debug('FFmpeg Error :%s', process.returncode)
                break
Пример #4
0
 def handle_upload(self):
     results = []
     for name, fieldStorage in self.request.POST.items():
         if type(fieldStorage) is unicode:
             continue
         result = {}
         result['name'] = py_urllib.unquote(fieldStorage.filename)
         result['type'] = fieldStorage.type
         result['size'] = self.get_file_size(fieldStorage.file)
         if self.validate(result):
             key, thumbnail_key = self.write_blob(fieldStorage.value,
                                                  result)
             if key is not None:
                 result['url'] = self.request.host_url + '/' + key
                 result['deleteUrl'] = result['url']
                 result['deleteType'] = 'DELETE'
                 if thumbnail_key is not None:
                     result['thumbnailUrl'] = self.request.host_url +\
                          '/' + thumbnail_key
             else:
                 result['error'] = 'Failed to store uploaded file.'
         results.append(result)
     return results
Пример #5
0
    def __get_download_list(html, tree, site_instance, item):
        download_list = []
        try:
            if 'DOWNLOAD_REGEX' not in site_instance.info:
                return download_list
            #logger.debug(html)
            #tmp = html.find('a href="https://www.rgtorrent.me/bbs/download.php')
            #if tmp != -1:
            #    logger.debug(html[tmp-300:tmp+300])
            #logger.debug(site_instance.info['DOWNLOAD_REGEX'])
            tmp = re.compile(site_instance.info['DOWNLOAD_REGEX'],
                             re.MULTILINE).finditer(html)
            for t in tmp:
                #logger.debug(t.group('url'))
                #logger.debug(t.group('filename'))
                if t.group('filename').strip() == '':
                    continue
                entity = {}
                entity['link'] = py_urllib.unquote(
                    t.group('url').strip()).strip()
                entity['link'] = unescape(entity['link'])
                logger.debug(entity['link'])
                entity['filename'] = py_urllib.unquote(
                    t.group('filename').strip())
                entity['filename'] = unescape(entity['filename'])
                if 'DOWNLOAD_URL_SUB' in site_instance.info:
                    logger.debug(entity['link'])
                    entity['link'] = re.sub(
                        site_instance.info['DOWNLOAD_URL_SUB'][0],
                        site_instance.info['DOWNLOAD_URL_SUB'][1].format(
                            URL=site_instance.info['TORRENT_SITE_URL']),
                        entity['link']).strip()
                if not entity['link'].startswith('http'):
                    form = '%s%s' if entity['link'].startswith(
                        '/') else '%s/%s'
                    entity['link'] = form % (
                        site_instance.info['TORRENT_SITE_URL'], entity['link'])
                if 'FILENAME_SUB' in site_instance.info:
                    entity['filename'] = re.sub(
                        site_instance.info['FILENAME_SUB'][0],
                        site_instance.info['FILENAME_SUB'][1],
                        entity['filename']).strip()
                exist = False
                for tt in download_list:
                    if tt['link'] == entity['link']:
                        exist = True
                        break
                if not exist:
                    if app.config['config']['is_sjva_server'] and len(
                            item['magnet']) > 0:  # or True:
                        try:
                            ext = os.path.splitext(
                                entity['filename'])[1].lower()
                            #item['magnet']
                            if ext in ['.smi', '.srt', '.ass']:
                                #if True:
                                import io
                                if 'USE_SELENIUM' in site_instance.info[
                                        'EXTRA']:
                                    from system import SystemLogicSelenium
                                    driver = SystemLogicSelenium.get_driver()
                                    driver.get(entity['link'])
                                    import time
                                    time.sleep(10)
                                    files = SystemLogicSelenium.get_downloaded_files(
                                    )
                                    logger.debug(files)
                                    # 파일확인
                                    filename_no_ext = os.path.splitext(
                                        entity['filename'].split('/')[-1])
                                    file_index = 0
                                    for idx, value in enumerate(files):
                                        if value.find(
                                                filename_no_ext[0]) != -1:
                                            file_index = idx
                                            break
                                    logger.debug('fileindex : %s', file_index)
                                    content = SystemLogicSelenium.get_file_content(
                                        files[file_index])

                                    byteio = io.BytesIO()
                                    byteio.write(content)
                                else:
                                    data = LogicFromSite.get_html(
                                        entity['link'],
                                        referer=item['url'],
                                        stream=True)
                                    byteio = io.BytesIO()
                                    for chunk in data.iter_content(1024):
                                        byteio.write(chunk)
                                from discord_webhook import DiscordWebhook, DiscordEmbed
                                webhook_url = app.config['config'][
                                    'rss_subtitle_webhook']
                                text = '%s\n<%s>' % (item['title'],
                                                     item['url'])
                                webhook = DiscordWebhook(url=webhook_url,
                                                         content=text)
                                webhook.add_file(file=byteio.getvalue(),
                                                 filename=entity['filename'])
                                response = webhook.execute()
                                discord = response.json()
                                logger.debug(discord)
                                if 'attachments' in discord:
                                    entity['direct_url'] = discord[
                                        'attachments'][0]['url']
                        except Exception as e:
                            logger.debug('Exception:%s', e)
                            logger.debug(traceback.format_exc())
                    download_list.append(entity)
            return download_list

        except Exception as e:
            logger.debug('Exception:%s', e)
            logger.debug(traceback.format_exc())
        return download_list
Пример #6
0
    def __get_bbs_list(site_instance,
                       board,
                       max_page,
                       max_id,
                       xpath_dict,
                       is_test=False):
        bbs_list = []
        index_step = xpath_dict[
            'INDEX_STEP'] if 'INDEX_STEP' in xpath_dict else 1
        index_start = xpath_dict[
            'INDEX_START'] if 'INDEX_START' in xpath_dict else 1
        stop_by_maxid = False
        if 'FORCE_FIRST_PAGE' in site_instance.info['EXTRA']:
            max_page = 1
        cookie = None
        if 'COOKIE' in site_instance.info:
            cookie = site_instance.info['COOKIE']

        for p in range(max_page):
            url = LogicFromSite.get_board_url(site_instance, board, str(p + 1))
            list_tag = xpath_dict['XPATH'][:xpath_dict['XPATH'].find('[%s]')]
            #list_tag = '/html/body/main/div/div/div[3]/div/table/tbody'
            logger.debug('list_tag : %s', list_tag)

            logger.debug('Url : %s', url)
            if 'USE_SELENIUM' in site_instance.info['EXTRA']:
                from system import SystemLogicSelenium
                tmp = SystemLogicSelenium.get_pagesoruce_by_selenium(
                    url, list_tag)
            else:
                tmp = LogicFromSite.get_html(url, cookie=cookie)
            #logger.debug(tmp)
            tree = html.fromstring(tmp)
            #tree = html.fromstring(LogicFromSite.get_html(url)))

            lists = tree.xpath(list_tag)

            logger.debug('Count : %s', len(lists))

            for i in range(index_start, len(lists) + 1, index_step):
                try:
                    a_tag = tree.xpath(xpath_dict['XPATH'] % i)
                    a_tag_index = len(a_tag) - 1

                    if a_tag_index == -1:
                        logger.debug('a_tag_index : %s', a_tag_index)
                        continue
                    item = {}
                    #
                    if 'TITLE_XPATH' in xpath_dict:

                        #logger.debug(a_tag[a_tag_index].xpath(xpath_dict['TITLE_XPATH']))
                        if xpath_dict['TITLE_XPATH'].endswith('text()'):
                            logger.debug(a_tag[a_tag_index].xpath(
                                xpath_dict['TITLE_XPATH']))

                            item['title'] = py_urllib.unquote(
                                a_tag[a_tag_index].xpath(
                                    xpath_dict['TITLE_XPATH'])[-1]).strip()
                        else:
                            item['title'] = py_urllib.unquote(
                                a_tag[a_tag_index].xpath(
                                    xpath_dict['TITLE_XPATH'])
                                [0].text_content()).strip()
                    else:
                        item['title'] = py_urllib.unquote(
                            a_tag[a_tag_index].text_content()).strip()

                    if 'TITLE_SUB' in xpath_dict:
                        item['title'] = re.sub(xpath_dict['TITLE_SUB'][0],
                                               xpath_dict['TITLE_SUB'][1],
                                               item['title']).strip()

                    # 일반적이 제목 처리 후 정규식이 있으면 추출
                    if 'TITLE_REGEX' in xpath_dict:
                        match = re.compile(xpath_dict['TITLE_REGEX']).search(
                            item['title'])
                        if match:
                            item['title'] = match.group('title')

                    item['url'] = a_tag[a_tag_index].attrib['href']
                    if 'DETAIL_URL_SUB' in site_instance.info:
                        #item['url'] = item['url'].replace(site_instance.info['DETAIL_URL_RULE'][0], site_instance.info['DETAIL_URL_RULE'][1].format(URL=site_instance.info['TORRENT_SITE_URL']))
                        item['url'] = re.sub(
                            site_instance.info['DETAIL_URL_SUB'][0],
                            site_instance.info['DETAIL_URL_SUB'][1].format(
                                URL=site_instance.info['TORRENT_SITE_URL']),
                            item['url'])

                    if not item['url'].startswith('http'):
                        form = '%s%s' if item['url'].startswith(
                            '/') else '%s/%s'
                        item['url'] = form % (
                            site_instance.info['TORRENT_SITE_URL'],
                            item['url'])

                    item['id'] = ''
                    if 'ID_REGEX' in site_instance.info:
                        id_regexs = [site_instance.info['ID_REGEX']]
                        #id_regexs.insert(0, site_instance.info['ID_REGEX'])
                    else:
                        id_regexs = [
                            r'wr_id\=(?P<id>\d+)', r'\/(?P<id>\d+)\.html',
                            r'\/(?P<id>\d+)$'
                        ]
                    for regex in id_regexs:
                        match = re.compile(regex).search(item['url'])
                        if match:
                            item['id'] = match.group('id')
                            break
                    if item['id'] == '':
                        for regex in id_regexs:
                            match = re.compile(regex).search(
                                item['url'].split('?')[0])
                            if match:
                                item['id'] = match.group('id')
                                break

                    logger.debug('ID : %s, TITLE : %s', item['id'],
                                 item['title'])
                    if item['id'].strip() == '':
                        continue
                    if is_test:
                        bbs_list.append(item)
                    else:
                        if 'USING_BOARD_CHAR_ID' in site_instance.info[
                                'EXTRA']:
                            # javdb
                            from .model import ModelBbs2
                            entity = ModelBbs2.get(
                                site=site_instance.info['NAME'],
                                board=board,
                                board_char_id=item['id'])
                            if entity is None:
                                bbs_list.append(item)
                                logger.debug('> Append..')
                            else:
                                logger.debug('> exist..')
                        else:
                            # 2019-04-04 토렌트퐁
                            try:
                                if 'NO_BREAK_BY_MAX_ID' in site_instance.info[
                                        'EXTRA']:
                                    if int(item['id']) <= max_id:
                                        continue
                                    else:
                                        bbs_list.append(item)
                                else:
                                    if int(item['id']) <= max_id:
                                        logger.debug('STOP by MAX_ID(%s)',
                                                     max_id)
                                        stop_by_maxid = True
                                        break
                                    bbs_list.append(item)
                                    #logger.debug(item)
                            except Exception as e:
                                logger.error('Exception:%s', e)
                                logger.error(traceback.format_exc())
                except Exception as e:
                    logger.error('Exception:%s', e)
                    logger.error(traceback.format_exc())
                    logger.error(site_instance.info)
            if stop_by_maxid:
                break
        logger.debug('Last count :%s', len(bbs_list))
        return bbs_list
Пример #7
0
    def get_show_info_on_home(root):
        try:
            tags = root.xpath('//*[@id="tvpColl"]/div[2]/div/div[1]/span/a')
            if len(tags) != 1:
                return
            entity = {}
            entity['title'] = tags[0].text
            match = re.compile(r'q\=(?P<title>.*?)&').search(
                tags[0].attrib['href'])
            if match:
                entity['title'] = py_urllib.unquote(match.group('title'))
            entity['id'] = re.compile(r'irk\=(?P<id>\d+)').search(
                tags[0].attrib['href']).group('id')

            entity['status'] = 0
            tags = root.xpath('//*[@id="tvpColl"]/div[2]/div/div[1]/span/span')
            if len(tags) == 1:
                if tags[0].text == u'방송종료':
                    entity['status'] = 1
                elif tags[0].text == u'방송예정':
                    entity['status'] = 2

            tags = root.xpath('//*[@id="tvpColl"]/div[2]/div/div[1]/div')
            entity['extra_info'] = tags[0].text_content().strip()

            tags = root.xpath('//*[@id="tvpColl"]/div[2]/div/div[1]/div/a')
            entity['studio'] = tags[0].text

            tags = root.xpath('//*[@id="tvpColl"]/div[2]/div/div[1]/div/span')
            entity['extra_info_array'] = [tag.text for tag in tags]

            entity['broadcast_info'] = entity['extra_info_array'][-2].strip()
            entity['broadcast_term'] = entity['extra_info_array'][-1].split(
                ',')[-1].strip()

            entity['year'] = re.compile(r'(?P<year>\d{4})').search(
                entity['extra_info_array'][-1]).group('year')

            #시리즈
            entity['series'] = []
            entity['series'].append({
                'title': entity['title'],
                'id': entity['id'],
                'year': entity['year']
            })
            tags = root.xpath('//*[@id="tv_series"]/div/ul/li')

            if tags:
                # 2019-03-05 시리즈 더보기 존재시
                try:
                    more = root.xpath('//*[@id="tv_series"]/div/div/a')
                    if more:
                        url = more[0].attrib['href']
                        if not url.startswith('http'):
                            url = 'https://search.daum.net/search%s' % url
                        logger.debug('MORE URL : %s', url)
                        if more[0].xpath('span')[0].text == u'시리즈 더보기':
                            more_root = Logic.get_lxml_by_url(url)
                            tags = more_root.xpath('//*[@id="series"]/ul/li')
                except Exception as exception:
                    logger.error('Exception:%s', exception)
                    logger.error(traceback.format_exc())

                for tag in tags:
                    dic = {}
                    dic['title'] = tag.xpath('a')[0].text
                    dic['id'] = re.compile(r'irk\=(?P<id>\d+)').search(
                        tag.xpath('a')[0].attrib['href']).group('id')
                    if tag.xpath('span'):
                        dic['date'] = tag.xpath('span')[0].text
                        dic['year'] = re.compile(r'(?P<year>\d{4})').search(
                            dic['date']).group('year')
                    else:
                        dic['year'] = None
                    entity['series'].append(dic)
                entity['series'] = sorted(entity['series'],
                                          key=lambda k: int(k['id']))

            #동명
            entity['equal_name'] = []
            tags = root.xpath(
                u'//div[@id="tv_program"]//dt[contains(text(),"동명 콘텐츠")]//following-sibling::dd'
            )
            if tags:
                tags = tags[0].xpath('*')
                for tag in tags:
                    if tag.tag == 'a':
                        dic = {}
                        dic['title'] = tag.text
                        dic['id'] = re.compile(r'irk\=(?P<id>\d+)').search(
                            tag.attrib['href']).group('id')
                    elif tag.tag == 'span':
                        match = re.compile(
                            r'\((?P<studio>.*?),\s*(?P<year>\d{4})?\)').search(
                                tag.text)
                        if match:
                            dic['studio'] = match.group('studio')
                            dic['year'] = match.group('year')
                        elif tag.text == u'(동명프로그램)':
                            entity['equal_name'].append(dic)
                        elif tag.text == u'(동명회차)':
                            continue
            #logger.debug(entity)
            return entity
        except Exception as exception:
            logger.error('Exception:%s', exception)
            logger.error(traceback.format_exc())
Пример #8
0
def api(sub):
    if sub == 'url.m3u8':
        try:
            mode = request.args.get('m')
            source = request.args.get('s')
            source_id = request.args.get('i')
            quality = request.args.get('q')
            logger.debug('m:%s, s:%s, i:%s', mode, source, source_id)
            action, ret = LogicKlive.get_url(source, source_id, quality, mode)
            #logger.debug('action:%s, url:%s', action, ret)
            
            if mode == 'plex':
                #new_url = '%s/klive/api/url.m3u8?m=web_play&s=%s&i=%s&q=%s' % (SystemModelSetting.get('ddns'), source, source_id, quality)
                new_url = '%s/klive/api/url.m3u8?m=url&s=%s&i=%s&q=%s' % (SystemModelSetting.get('ddns'), source, source_id, quality)
                #logger.debug(SystemModelSetting.get_bool('auth_use_apikey'))
                if SystemModelSetting.get_bool('auth_use_apikey'):
                    new_url += '&apikey=%s' % SystemModelSetting.get('auth_apikey')
                def generate():
                    startTime = time.time()
                    buffer = []
                    sentBurst = False
                    
                    if platform.system() == 'Windows':
                        path_ffmpeg = os.path.join(path_app_root, 'bin', platform.system(), 'ffmpeg.exe')
                    else:
                        path_ffmpeg = 'ffmpeg'

                    #ffmpeg_command = [path_ffmpeg, "-i", new_url, "-c", "copy", "-f", "mpegts", "-tune", "zerolatency", "pipe:stdout"]
                    #ffmpeg_command = [path_ffmpeg, "-i", new_url, "-c:v", "copy", "-c:a", "aac", "-b:a", "128k", "-f", "mpegts", "-tune", "zerolatency", "pipe:stdout"]
                    
                    # 2020-12-17 by 잠자
                    ffmpeg_command = [path_ffmpeg, "-loglevel", "quiet", "-i", new_url, "-c:v", "copy", "-c:a", "aac", "-b:a", "128k", "-f", "mpegts", "-tune", "zerolatency", "pipe:stdout"]


                    #logger.debug('command : %s', ffmpeg_command)
                    process = subprocess.Popen(ffmpeg_command, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = -1)
                    global process_list
                    process_list.append(process)
                    while True:
                        line = process.stdout.read(1024)
                        buffer.append(line)
                        if sentBurst is False and time.time() > startTime + 1 and len(buffer) > 0:
                            sentBurst = True
                            for i in range(0, len(buffer) - 2):
                                yield buffer.pop(0)
                        elif time.time() > startTime + 1 and len(buffer) > 0:
                            yield buffer.pop(0)
                        process.poll()
                        if isinstance(process.returncode, int):
                            if process.returncode > 0:
                                logger.debug('FFmpeg Error :%s', process.returncode)
                            break
                return Response(stream_with_context(generate()), mimetype = "video/MP2T")

            if action == 'redirect':
                return redirect(ret, code=302)
            elif action == 'return_after_read':
                logger.warning('return_after_read')
                data = LogicKlive.get_return_data(source, source_id, ret, mode)
                #logger.debug('Data len : %s', len(data))
                return data, 200, {'Content-Type': 'application/vnd.apple.mpegurl'}
            elif action == 'return':
                return ret
            if ret == None: return
            if mode == 'url.m3u8':
                return redirect(ret, code=302)
            elif mode == 'lc':
                return ret
        except Exception as e: 
            logger.error('Exception:%s', e)
            logger.error(traceback.format_exc())  
    elif sub == 'm3uall':
        return LogicKlive.get_m3uall()
    elif sub == 'm3u':
        data = LogicKlive.get_m3u(m3u_format=request.args.get('format'), group=request.args.get('group'), call=request.args.get('call'))
        if request.args.get('file') == 'true':
            import framework.common.util as CommonUtil
            basename = 'klive_custom.m3u'
            filename = os.path.join(path_data, 'tmp', basename)
            CommonUtil.write_file(data, filename)
            return send_file(filename, as_attachment=True, attachment_filename=basename)
        else:
            return data
    elif sub == 'm3utvh':
        return LogicKlive.get_m3u(for_tvh=True, m3u_format=request.args.get('format'), group=request.args.get('group'))
    elif sub == 'redirect':
        try:
            url = request.args.get('url')
            proxy = request.args.get('proxy')
            proxies = None
            if proxy is not None:
                proxy = py_urllib.unquote(proxy)
                proxies={"https": proxy, 'http':proxy}
            url = py_urllib.unquote(url)
            #logger.debug('REDIRECT:%s', url)
            #logger.warning(f"redirect : {url}")
            # 2021-06-03
            """
            res = requests.get(url, proxies=proxies)
            data = res.content
            return data, 200, {'Content-Type':res.headers['Content-Type']}
            """
            headers = {'Connection' : 'keep-alive'}
            r = requests.get(url, headers=headers, stream=True, proxies=proxies)
            rv = Response(r.iter_content(chunk_size=1024), r.status_code, content_type=r.headers['Content-Type'], direct_passthrough=True)
            return rv

        except Exception as e: 
            logger.error('Exception:%s', e)
            logger.error(traceback.format_exc())

    elif sub == 'url.mpd':
        try:
            mode = request.args.get('m')
            source = request.args.get('s')
            source_id = request.args.get('i')
            quality = request.args.get('q')
            return_format = 'json'
            data = LogicKlive.get_play_info(source, source_id, quality, mode=mode, return_format=return_format)
            return jsonify(data)
        except Exception as e: 
            logger.error('Exception:%s', e)
            logger.error(traceback.format_exc())
    elif sub == 'url.strm':
        try:
            mode = request.args.get('m')
            source = request.args.get('s')
            source_id = request.args.get('i')
            quality = request.args.get('q')
            return_format = 'strm'
            data = LogicKlive.get_play_info(source, source_id, quality, mode=mode, return_format=return_format)
            #return data

            import framework.common.util as CommonUtil
            from .model import ModelCustom
            db_item = ModelCustom.get(source, source_id)
            if db_item is not None:
                basename = '%s.strm' % db_item.title
            else:
                basename = '%s.strm' % source_id
            filename = os.path.join(path_data, 'tmp', basename)
            CommonUtil.write_file(data, filename)
            return send_file(filename, as_attachment=True, attachment_filename=basename)

            #return data
        except Exception as e: 
            logger.error('Exception:%s', e)
            logger.error(traceback.format_exc())              
    elif sub == 'sinaplayer':
        data = LogicKlive.get_m3u_for_sinaplayer()
        return data
Пример #9
0
    def info_detail(cls, code, entity):
        try:
            #https://movie.naver.com/movie/bi/mi/detail.nhn?code=182205

            url = 'https://movie.naver.com/movie/bi/mi/detail.nhn?code=%s' % code[
                2:]
            #logger.debug(url)
            root = html.fromstring(requests.get(url).text)

            tags = root.xpath('//ul[@class="lst_people"]/li')
            if tags:
                for tag in tags:
                    actor = EntityActor('', site=cls.site_name)
                    tmp = tag.xpath('.//img')[0].attrib['src']

                    match = re.search(r'src\=(?P<url>.*?)\&', tmp)
                    if match:
                        actor.thumb = py_urllib.unquote(match.group('url'))

                    actor.name = tag.xpath(
                        './/div[@class="p_info"]/a')[0].attrib['title']
                    tmp = tag.xpath('.//div[@class="p_info"]/em')
                    if tmp:
                        actor.originalname = tmp[0].text_content()
                    tmp = tag.xpath(
                        './/div[@class="p_info"]//p[@class="pe_cmt"]/span')
                    if tmp:
                        actor.role = tmp[0].text_content().replace(u'역',
                                                                   '').strip()
                    entity.actor.append(actor)

            tags = root.xpath(
                '//div[@class="director"]//div[@class="dir_obj"]')
            if tags:
                for tag in tags:
                    tmp = tag.xpath('.//div[@class="dir_product"]/a')
                    if tmp:
                        entity.director.append(tmp[0].attrib['title'])

            #
            tags = root.xpath('//div[@class="staff"]//tr[1]//span')
            if tags:
                for tag in tags:
                    tmp = tag.xpath('.//a')
                    if tmp:
                        entity.credits.append(tmp[0].text_content().strip())
                    else:
                        entity.credits.append(tag.text.strip())

            tags = root.xpath('//div[@class="agency"]/dl')
            if tags:
                tmp1 = tags[0].xpath('.//dt')
                tmp2 = tags[0].xpath('.//dd')
                for idx, tag in enumerate(tmp1):
                    if tag.text_content().strip() == u'제작':
                        tmp = tmp2[idx].xpath('.//a')
                        entity.studio = tmp[0].text_content().strip(
                        ) if tmp else tmp2[idx].text_content().strip()

        except Exception as exception:
            logger.error('Exception:%s', exception)
            logger.error(traceback.format_exc())
Пример #10
0
    def get_show_info_on_home(cls, root):
        try:
            tags = root.xpath('//*[@id="tvpColl"]/div[2]/div/div[1]/span/a')
            # 2019-05-13
            #일밤- 미스터리 음악쇼 복면가왕 A 태그 2개
            if len(tags) < 1:
                return
            tag_index = len(tags) - 1
            #entity = {}
            entity = EntitySearchItemTvDaum(cls.site_name)

            entity.title = tags[tag_index].text
            match = re.compile(r'q\=(?P<title>.*?)&').search(
                tags[tag_index].attrib['href'])
            if match:
                entity.title = py_urllib.unquote(match.group('title'))
            entity.code = cls.module_char + cls.site_char + re.compile(
                r'irk\=(?P<id>\d+)').search(
                    tags[tag_index].attrib['href']).group('id')

            tags = root.xpath('//*[@id="tvpColl"]/div[2]/div/div[1]/span/span')
            if len(tags) == 1:
                if tags[0].text == u'방송종료' or tags[0].text == u'완결':
                    entity.status = 2
                elif tags[0].text == u'방송예정':
                    entity.status = 0

            #entity.image_url = 'https:' + root.xpath('//*[@id="tv_program"]/div[1]/div[1]/a/img')[0].attrib['src']
            # 악동탐정스 시즌2
            try:
                entity.image_url = cls.process_image_url(
                    root.xpath('//*[@id="tv_program"]/div[1]/div[1]/a/img')
                    [0].attrib['src'])
            except:
                entity.image_url = None

            #logger.debug('get_show_info_on_home status: %s', entity.status)
            tags = root.xpath('//*[@id="tvpColl"]/div[2]/div/div[1]/div')
            entity.extra_info = SiteUtil.change_html(
                tags[0].text_content().strip())

            #logger.debug('get_show_info_on_home extra_info: %s', entity.extra_info)

            tags = root.xpath('//*[@id="tvpColl"]/div[2]/div/div[1]/div/a')
            if len(tags) == 1:
                entity.studio = tags[0].text
            else:
                tags = root.xpath(
                    '//*[@id="tvpColl"]/div[2]/div/div[1]/div/span[1]')
                if len(tags) == 1:
                    entity.studio = tags[0].text
            #logger.debug('get_show_info_on_home studio: %s', entity.studio)

            tags = root.xpath('//*[@id="tvpColl"]/div[2]/div/div[1]/div/span')
            extra_infos = [tag.text_content() for tag in tags]
            logger.debug(extra_infos)
            #tmps = extra_infos[1].strip().split(' ')
            # 2021-11-03
            # 홍루몽.  중국 방송사는 a 태그가 없기 떄문에 방송사가 장르가 되어버린다.
            entity.genre = extra_infos[0]
            if extra_infos[1] in [
                    '미국드라마', '중국드라마', '영국드라마', '일본드라마', '대만드라마', '기타국가드라마'
            ]:
                entity.genre = extra_infos[1]
                entity.studio = extra_infos[0]
            if entity.genre in [
                    '미국드라마', '중국드라마', '영국드라마', '일본드라마', '대만드라마', '기타국가드라마'
            ]:
                entity.status = 1
            #logger.debug(tmps)
            #if len(tmps) == 2:
            try:
                entity.episode = int(
                    re.compile(r'(?P<epi>\d{1,4})%s' % u'부').search(
                        entity.extra_info).group('epi'))
            except:
                entity.episode = -1
            entity.broadcast_info = extra_infos[-2].strip().replace(
                '&nbsp;', ' ').replace('&nbsp', ' ')
            entity.broadcast_term = extra_infos[-1].split(',')[-1].strip()

            try:
                entity.year = re.compile(r'(?P<year>\d{4})').search(
                    extra_infos[-1]).group('year')
            except:
                entity.year = 0

            entity.desc = root.xpath(
                '//*[@id="tv_program"]/div[1]/dl[1]/dd/text()')[0]

            #logger.debug('get_show_info_on_home 1: %s', entity['status'])
            #시리즈
            entity.series = []

            try:
                tmp = entity.broadcast_term.split('.')
                if len(tmp) == 2:
                    entity.series.append({
                        'title': entity.title,
                        'code': entity.code,
                        'year': entity.year,
                        'status': entity.status,
                        'date': '%s.%s' % (tmp[0], tmp[1])
                    })
                else:
                    entity.series.append({
                        'title': entity.title,
                        'code': entity.code,
                        'year': entity.year,
                        'status': entity.status,
                        'date': '%s' % (entity.year)
                    })
            except Exception as exception:
                logger.debug('Not More!')
                logger.debug(traceback.format_exc())

            tags = root.xpath('//*[@id="tv_series"]/div/ul/li')

            if tags:
                # 2019-03-05 시리즈 더보기 존재시
                try:
                    more = root.xpath('//*[@id="tv_series"]/div/div/a')
                    if more:
                        url = more[0].attrib['href']
                        if not url.startswith('http'):
                            url = 'https://search.daum.net/search%s' % url
                        #logger.debug('MORE URL : %s', url)
                        if more[0].xpath('span')[0].text == u'시리즈 더보기':
                            #more_root = HTML.ElementFromURL(url)
                            more_root = SiteUtil.get_tree(
                                url,
                                proxy_url=SystemModelSetting.get(
                                    'site_daum_proxy'),
                                headers=cls.default_headers,
                                cookies=SystemLogicSite.get_daum_cookies())
                            tags = more_root.xpath('//*[@id="series"]/ul/li')
                except Exception as exception:
                    logger.debug('Not More!')
                    logger.debug(traceback.format_exc())

                find_1900 = False
                for tag in tags:
                    dic = {}
                    dic['title'] = tag.xpath('a')[0].text
                    #logger.debug(dic['title'])
                    dic['code'] = cls.module_char + cls.site_char + re.compile(
                        r'irk\=(?P<id>\d+)').search(
                            tag.xpath('a')[0].attrib['href']).group('id')
                    if tag.xpath('span'):
                        # 년도 없을 수 있음
                        dic['date'] = tag.xpath('span')[0].text
                        if dic['date'] is None:
                            dic['date'] = '1900'
                            find_1900 = True
                        else:
                            dic['year'] = re.compile(
                                r'(?P<year>\d{4})').search(
                                    dic['date']).group('year')
                    else:
                        dic['year'] = None
                    entity.series.append(dic)
                # 뒷 시즌이 code가 더 적은 경우 있음. csi 라스베가스
                # 2021-03-29 전지적 짝사랑 시점
                if find_1900 or entity.year == 0:
                    entity.series = sorted(entity.series,
                                           key=lambda k: int(k['code'][2:]))
                else:
                    # 2021-06-06 펜트하우스3. 2는 2021.2로 나오고 3은 2021로만 나와서 00이 붙어 3이 위로 가버림
                    # 같은 년도는 코드로...
                    """
                    for item in entity.series:
                        tmp = item['date'].split('.')
                        if len(tmp) == 2:
                            item['sort_value'] = int('%s%s' % (tmp[0],tmp[1].zfill(2)))
                        elif len(tmp) == 1:
                            item['sort_value'] = int('%s00' % tmp[0])
                    entity.series = sorted(entity.series, key=lambda k: k['sort_value'])
                    """
                    for item in entity.series:
                        tmp = item['date'].split('.')
                        if len(tmp) == 2:
                            item['sort_value'] = int(tmp[0])
                        elif len(tmp) == 1:
                            item['sort_value'] = int(tmp[0])
                    entity.series = sorted(
                        entity.series,
                        key=lambda k: (k['sort_value'], int(k['code'][2:])))

            #동명
            entity.equal_name = []
            tags = root.xpath(
                u'//div[@id="tv_program"]//dt[contains(text(),"동명 콘텐츠")]//following-sibling::dd'
            )
            if tags:
                tags = tags[0].xpath('*')
                for tag in tags:
                    if tag.tag == 'a':
                        dic = {}
                        dic['title'] = tag.text
                        dic['code'] = cls.module_char + cls.site_char + re.compile(
                            r'irk\=(?P<id>\d+)').search(
                                tag.attrib['href']).group('id')
                    elif tag.tag == 'span':
                        match = re.compile(
                            r'\((?P<studio>.*?),\s*(?P<year>\d{4})?\)').search(
                                tag.text)
                        if match:
                            dic['studio'] = match.group('studio')
                            dic['year'] = match.group('year')
                        elif tag.text == u'(동명프로그램)':
                            entity.equal_name.append(dic)
                        elif tag.text == u'(동명회차)':
                            continue
            #logger.debug(entity)
            return entity.as_dict()
        except Exception as exception:
            logger.debug('Exception get_show_info_by_html : %s', exception)
            logger.debug(traceback.format_exc())
Пример #11
0
 def process_image_url(cls, url):
     tmps = url.split('fname=')
     if len(tmps) == 2:
         return py_urllib.unquote(tmps[1])
     else:
         return 'https' + url
Пример #12
0
 def normalize(self, str):
     return py_urllib.quote(py_urllib.unquote(str), '')