def get_translated_tag(cls, type, tag): tags_json = os.path.join(os.path.dirname(__file__), 'tags.json') with open(tags_json, 'r', encoding='utf8') as f: tags = json.load(f) if type in tags: if tag in tags[type]: res = tags[type][tag] else: trans_text = SystemLogicTrans.trans(tag, source='ja', target='ko').strip() # logger.debug(f'태그 번역: {tag} - {trans_text}') if cls.is_include_hangul(trans_text) or trans_text.replace( ' ', '').isalnum(): tags[type][tag] = trans_text with open(tags_json, 'w', encoding='utf8') as f: json.dump(tags, f, indent=4, ensure_ascii=False) res = tags[type][tag] else: res = tag return res else: return tag
def search(cls, keyword, do_trans=True, proxy_url=None, image_mode='0', manual=False): try: ret = {} if re.search('(\\d{6}[_-]\\d+)', keyword, re.I) is not None: code = re.search('(\\d{6}[_-]\\d+)', keyword, re.I).group().replace('-', '_') else: # logger.debug(f'invalid keyword: {keyword}') ret['ret'] = 'failed' ret['data'] = 'invalid keyword' return ret proxies = {'http': proxy_url, 'https': proxy_url} url = f'{cls.site_base_url}/dyn/phpauto/movie_details/movie_id/{code}.json' try: response = requests.get(url, proxies=proxies) json_data = response.json() except: # logger.debug(f'not found: {keyword}') ret['ret'] = 'failed' ret['data'] = response.status_code return ret ret = {'data' : []} item = EntityAVSearch(cls.site_name) item.code = cls.module_char + cls.site_char + code item.title = item.title_ko = json_data['Title'] item.year = json_data['Year'] item.image_url = json_data['MovieThumb'] if manual == True: if image_mode == '3': image_mode = '0' item.image_url = SiteUtil.process_image_mode(image_mode, item.image_url, proxy_url=proxy_url) if do_trans: item.title_ko = SystemLogicTrans.trans(item.title, source='ja', target='ko') item.ui_code = f'1pon-{code}' if '1pon' in keyword.lower(): item.score = 100 else: item.score = 90 logger.debug('score :%s %s ', item.score, item.ui_code) ret['data'].append(item.as_dict()) ret['data'] = sorted(ret['data'], key=lambda k: k['score'], reverse=True) ret['ret'] = 'success' except Exception as exception: logger.error('Exception:%s', exception) logger.error(traceback.format_exc()) ret['ret'] = 'exception' ret['data'] = str(exception) return ret
def search(cls, keyword, do_trans=True, proxy_url=None, image_mode='0', manual=False): try: ret = {} keyword = keyword.strip().lower() url = f'{cls.site_base_url}/search?kw={keyword}' tree = SiteUtil.get_tree(url, proxy_url=proxy_url) if SiteUtil.get_response(url).status_code == 404 or SiteUtil.get_response(url).status_code == 500: logger.debug(f'not found: {keyword}') ret['ret'] = 'failed' ret['data'] = 'not found' return ret elif tree.xpath('/html/head/meta[@property="og:url"]/@content')[0] == 'https://fc2hub.com/search': logger.debug(f'not found: {keyword}') ret['ret'] = 'failed' ret['data'] = 'not found' return ret ret = {'data' : []} item = EntityAVSearch(cls.site_name) item.code = cls.module_char + cls.site_char + tree.xpath('//*[@id="content"]//h1[@class="card-title fc2-id"]/text()')[0].split('-')[2] # 세로 포스터 없음 item.image_url = tree.xpath('//a[@data-fancybox="gallery"]/@href')[0] item.title = tree.xpath('//*[@id="content"]//h1[@class="card-text fc2-title"]/text()')[0].strip() # 정확하지 않음, 추후 처리 필요 item.year = parse(tree.xpath('/html/head/meta[@property="videos:published_time"]/@content')[0]).date().year if manual == True: if image_mode == '3': image_mode = '0' item.image_url = SiteUtil.process_image_mode(image_mode, item.image_url, proxy_url=proxy_url) if do_trans: item.title_ko = SystemLogicTrans.trans(item.title, source='ja', target='ko') item.ui_code = f'FC2-{item.code[2:]}' # 스코어 계산 부분 필요 # fc2hub는 정확한 날짜가 없음, 그래서 90 item.score = 90 logger.debug('score :%s %s ', item.score, item.ui_code) ret['data'].append(item.as_dict()) ret['data'] = sorted(ret['data'], key=lambda k: k['score'], reverse=True) ret['ret'] = 'success' except Exception as exception: logger.error('Exception:%s', exception) logger.error(traceback.format_exc()) ret['ret'] = 'exception' ret['data'] = str(exception) return ret
def search(cls, keyword, do_trans=True, proxy_url=None, image_mode='0', manual=False): try: ret = {'data':[]} keyword = keyword.strip().lower() # 2020-06-24 if keyword[-3:-1] == 'cd': keyword = keyword[:-3] keyword = keyword.replace(' ', '-') url = '{site_base_url}/search/{keyword}'.format(site_base_url=cls.site_base_url, keyword=keyword) tree = SiteUtil.get_tree(url, proxy_url=proxy_url) #lists = tree.xpath('//*[@id="waterfall"]/div') lists = tree.xpath('//a[@class="movie-box"]') for node in lists: try: item = EntityAVSearch(cls.site_name) tag = node.xpath('.//img')[0] item.image_url = tag.attrib['src'].lower() if manual == True: if image_mode == '3': image_mode = '0' item.image_url = SiteUtil.process_image_mode(image_mode, item.image_url, proxy_url=proxy_url) """ tmp = SiteUtil.discord_proxy_get_target(item.image_url) if tmp is None: item.image_url = SiteUtil.process_image_mode(image_mode, item.image_url, proxy_url=proxy_url) else: item.image_url = tmp """ tag = node.xpath('.//date') item.ui_code = tag[0].text_content().strip() item.code = cls.module_char + cls.site_char + node.attrib['href'].split('/')[-1] item.desc = u'발매일 : ' + tag[1].text_content().strip() item.year = int(tag[1].text_content().strip()[:4]) item.title = item.title_ko = node.xpath('.//span/text()')[0].strip() if do_trans: item.title_ko = SystemLogicTrans.trans(item.title, source='ja', target='ko') item.score = 100 if keyword.lower() == item.ui_code.lower() else 60 - (len(ret['data'])*10) if item.score < 0: item.socre = 0 #logger.debug(item) ret['data'].append(item.as_dict()) except Exception as exception: logger.error('Exception:%s', exception) logger.error(traceback.format_exc()) ret['data'] = sorted(ret['data'], key=lambda k: k['score'], reverse=True) ret['ret'] = 'success' return ret except Exception as exception: logger.error('Exception:%s', exception) logger.error(traceback.format_exc()) ret['ret'] = 'exception' ret['data'] = str(exception) return ret
def process_actor_image(cls, tmdb, show): try: tmdb_actor = tmdb.credits(language='en') for tmdb_item in tmdb_actor['cast']: if tmdb_item['profile_path'] is None: continue kor_name = SystemLogicTrans.trans(tmdb_item['name'], source='en', target='ko') #kor_name = MetadataServerUtil.trans_en_to_ko(tmdb_item['name']) flag_find = False #logger.debug(tmdb_item) for actor in show['actor']: if actor['name'] == kor_name: flag_find = True actor[ 'thumb'] = 'https://image.tmdb.org/t/p/' + 'original' + tmdb_item[ 'profile_path'] break if flag_find == False: kor_role_name = SystemLogicTrans.trans( tmdb_item['character'], source='en', target='ko') #kor_role_name = MetadataServerUtil.trans_en_to_ko(tmdb_item['character']) for actor in show['actor']: if actor['role'] == kor_role_name: flag_find = True actor[ 'thumb'] = 'https://image.tmdb.org/t/p/' + 'original' + tmdb_item[ 'profile_path'] break #if flag_find == False: # logger.debug(kor_name) except Exception as exception: logger.error('Exception:%s', exception) logger.error(traceback.format_exc())
def _set_info(tree, ret, path_str, info): ret[info] = '' ret['%s_ko' % info] = '' try: tag = tree.xpath(path_str) if tag: ret[info] = tag[0].text_content().strip() if info == 'studio': if ret[info] in _studio: ret['studio_ko'] = _studio[ret['studio']] return ret ret['%s_ko' % info] = SystemLogicTrans.trans(ret[info]) except Exception as exception: logger.error('Exception:%s', exception) logger.error(traceback.format_exc()) return ret
def info_actor(cls, tmdb, entity, primary=True, kor_trans=True): try: info = tmdb.credits(language='en') trans = False if kor_trans and ( (len(entity.country) > 0 and entity.country[0] in ['South Korea', u'한국', u'대한민국']) or (entity.extra_info['original_language'] == 'ko')): trans = True #trans = True # 한국배우는 자동번역 if primary: logger.debug(len(info['cast'])) for tmdb_item in info['cast'][:20]: name = tmdb_item['original_name'] #logger.debug(tmdb_item) try: if SiteUtil.is_include_hangul( tmdb_item['original_name']) == False: people_info = tmdbsimple.People( tmdb_item['credit_id']).info() for tmp in people_info['also_known_as']: if SiteUtil.is_include_hangul(tmp): name = tmp break except: pass actor = EntityActor('', site=cls.site_name) actor.name = SystemLogicTrans.trans( name, source='en', target='ko').replace( ' ', '') if trans else name actor.role = SystemLogicTrans.trans( tmdb_item['character'], source='en', target='ko').replace( ' ', '') if trans else tmdb_item['character'] if tmdb_item['profile_path'] is not None: actor.thumb = 'https://image.tmdb.org/t/p/' + 'original' + tmdb_item[ 'profile_path'] entity.actor.append(actor) for tmdb_item in info['crew'][:20]: if tmdb_item['job'] == 'Director': entity.director.append( SystemLogicTrans.trans(tmdb_item['original_name'], source='en', target='ko'). replace(' ', '' ) if trans else tmdb_item['original_name']) if tmdb_item['job'] == 'Executive Producer': entity.producers.append( SystemLogicTrans.trans(tmdb_item['original_name'], source='en', target='ko'). replace(' ', '' ) if trans else tmdb_item['original_name']) if tmdb_item['job'] == 'Producer': entity.producers.append( SystemLogicTrans.trans(tmdb_item['original_name'], source='en', target='ko'). replace(' ', '' ) if trans else tmdb_item['original_name']) if tmdb_item['job'] in ['Writer', 'Novel', 'Screenplay']: entity.credits.append( SystemLogicTrans.trans(tmdb_item['original_name'], source='en', target='ko'). replace(' ', '' ) if trans else tmdb_item['original_name']) except Exception as exception: logger.error('Exception:%s', exception) logger.error(traceback.format_exc())
def search(cls, keyword, do_trans=True, proxy_url=None, image_mode='0', manual=False): try: ret = {} keyword = keyword.strip().lower() url = f'{cls.site_base_url}{keyword}/' tree = SiteUtil.get_tree(url, proxy_url=proxy_url) if SiteUtil.get_response( url).status_code == 404 or SiteUtil.get_response( url).status_code == 410: logger.debug(f'not found: {keyword}') ret['ret'] = 'failed' ret['data'] = 'not found' return ret ret = {'data': []} item = EntityAVSearch(cls.site_name) item.code = cls.module_char + cls.site_char + keyword item.title = item.title_ko = tree.xpath( '//div[@class="my__product__detail__title notranslate"]/text()' )[0] item.image_url = tree.xpath( '//div[@class="my__product__image lazyload"]/@data-bg')[0] item.year = parse( tree.xpath('//div[@class="my__product__spec"]/text()') [1]).date().year if manual == True: if image_mode == '3': image_mode = '0' item.image_url = SiteUtil.process_image_mode( image_mode, item.image_url, proxy_url=proxy_url) if do_trans: item.title_ko = SystemLogicTrans.trans(item.title, source='ja', target='ko') item.ui_code = f'FC2-{keyword}' # 스코어 계산 부분 필요 item.score = 100 logger.debug('score :%s %s ', item.score, item.ui_code) ret['data'].append(item.as_dict()) ret['data'] = sorted(ret['data'], key=lambda k: k['score'], reverse=True) ret['ret'] = 'success' except Exception as exception: logger.error('Exception:%s', exception) logger.error(traceback.format_exc()) ret['ret'] = 'exception' ret['data'] = str(exception) return ret
def search(cls, keyword, do_trans=True, proxy_url=None, image_mode='0', manual=False): try: ret = {} if re.search('(\\d{4})', keyword, re.I) is not None and 'heyzo' in keyword.lower(): code = re.search('(\\d{4})', keyword, re.I).group() else: # logger.debug(f'invalid keyword: {keyword}') ret['ret'] = 'failed' ret['data'] = 'invalid keyword' return ret url = f'{cls.site_base_url}/moviepages/{code}/index.html' if SiteUtil.get_response(url, proxy_url=proxy_url).status_code == 404: # logger.debug(f'not found: {keyword}') ret['ret'] = 'failed' ret['data'] = 'not found' return ret tree = SiteUtil.get_tree(url, proxy_url=proxy_url) ret = {'data': []} item = EntityAVSearch(cls.site_name) item.code = cls.module_char + cls.site_char + code # json이 있는 경우, 없는 경우 tmp = {} try: json_data = json.loads(re.sub( '(\"\")\w.', '\"', tree.xpath( '//*[@id="movie"]/script[@type="application/ld+json"]/text()' )[0]), strict=False) tmp['title'] = unicodedata.normalize('NFKC', json_data['name']) tmp['year'] = parse(json_data['dateCreated']).date().year tmp['image_url'] = f'https:{json_data["image"]}' except: m_tree = SiteUtil.get_tree(url.replace('www.', 'm.'), proxy_url=proxy_url) tmp['title'] = m_tree.xpath( '//div[@id="container"]/h1/text()')[0].strip() tmp['year'] = parse( m_tree.xpath('//*[@id="moviedetail"]/div[2]/span/text()') [1].strip()).date().year tmp['image_url'] = f'https://m.heyzo.com/contents/3000/{code}/images/player_thumbnail.jpg' item.title = item.title_ko = tmp['title'] item.year = tmp['year'] item.image_url = tmp['image_url'] if manual == True: if image_mode == '3': image_mode = '0' item.image_url = SiteUtil.process_image_mode( image_mode, item.image_url, proxy_url=proxy_url) if do_trans: item.title_ko = SystemLogicTrans.trans(item.title, source='ja', target='ko') item.ui_code = f'HEYZO-{code}' if 'heyzo' in keyword.lower(): item.score = 100 else: item.score = 90 logger.debug('score :%s %s ', item.score, item.ui_code) ret['data'].append(item.as_dict()) ret['data'] = sorted(ret['data'], key=lambda k: k['score'], reverse=True) ret['ret'] = 'success' except Exception as exception: logger.error('Exception:%s', exception) logger.error(traceback.format_exc()) ret['ret'] = 'exception' ret['data'] = str(exception) return ret
def search(cls, keyword, do_trans=True, proxy_url=None, image_mode='0', manual=False): from metadata import P as MetadataPlugin MetadataModelSetting = MetadataPlugin.ModelSetting javdb_headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Language': 'ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7', 'Cookie': f'locale=en; over18=1; _jdb_session={MetadataModelSetting.get("jav_fc2_javdb_jdbsession")};', } try: ret = {} keyword = keyword.strip().lower() url = f'{MetadataModelSetting.get("jav_fc2_javdb_url")}/search?q={keyword}' if MetadataModelSetting.get('jav_fc2_javdb_jdbsession') == '': raise Exception('jdbsession required') tree = SiteUtil.get_tree(url, proxy_url=proxy_url, headers=javdb_headers) ret = {'data': []} search_result = (zip( tree.xpath('//*[@id="videos"]/div/div/a/div[2]/text()'), tree.xpath('//*[@id="videos"]/div/div/a/div[4]/text()'), tree.xpath('//*[@id="videos"]/div/div/a/@href'), tree.xpath('//*[@id="videos"]/div/div/a/div[1]/img/@data-src'), tree.xpath('//*[@id="videos"]/div/div[1]/a/div[3]/text()'))) item = EntityAVSearch(cls.site_name) javdb_code = '' for result, date, url, thumburl, summary in search_result: if result.find('FC2-' + keyword) >= 0: javdb_code = url.split('/')[2] item.code = cls.module_char + cls.site_char + javdb_code item.title = item.title_ko = summary item.image_url = thumburl if manual == True: if image_mode == '3': image_mode = '0' item.image_url = SiteUtil.process_image_mode( image_mode, item.image_url, proxy_url=proxy_url) try: item.year = parse(date.strip()).date().year except: pass break if javdb_code == '': logger.debug(f'not found: {keyword}') ret['ret'] = 'failed' ret['data'] = 'not found' return ret if do_trans: item.title_ko = SystemLogicTrans.trans(item.title, source='ja', target='ko') item.ui_code = f'FC2-{keyword}' # 스코어 계산 부분 필요 item.score = 100 logger.debug('score :%s %s ', item.score, item.ui_code) ret['data'].append(item.as_dict()) ret['data'] = sorted(ret['data'], key=lambda k: k['score'], reverse=True) ret['ret'] = 'success' except Exception as exception: logger.error('Exception:%s', exception) logger.error(traceback.format_exc()) ret['ret'] = 'exception' ret['data'] = str(exception) return ret
def search(cls, keyword, do_trans=True, proxy_url=None, image_mode='0', manual=False): try: ret = {} keyword = keyword.strip().lower() url = f'{cls.site_base_url}/?p={keyword}&nc=0' if SiteUtil.get_response(url).status_code == 404: logger.debug(f'not found: {keyword}') ret['ret'] = 'failed' ret['data'] = 'not found' return ret elif SiteUtil.get_response(url).status_code == 403: logger.debug('fc2cm 403 error') ret['ret'] = 'failed' ret['data'] = 'fc2cm 403' return ret tree = SiteUtil.get_tree(url, proxy_url=proxy_url, headers=cls.headers) if tree.xpath('/html/head/title/text()')[0] == '404': logger.debug(f'not found: {keyword}') ret['ret'] = 'failed' ret['data'] = 'not found' return ret elif tree.xpath( '//*[@id="contentInner"]/main/article/aside/div/div/h1/text()' ) != []: if tree.xpath( '//*[@id="contentInner"]/main/article/aside/div/div/h1/text()' )[0] == ' Hello! my name is 404 ': logger.debug(f'not found: {keyword}') ret['ret'] = 'failed' ret['data'] = 'not found' return ret ret = {'data': []} item = EntityAVSearch(cls.site_name) for tr in tree.xpath( '//*[@id="contentInner"]/main/article/aside/div/div/table/tr' ): if tr.xpath('.//td//text()')[0] == '商品ID': result_codename = tr.xpath('.//td//text()')[2] item.code = cls.module_char + cls.site_char + result_codename if tr.xpath('.//td//text()')[0] == '販売日': item.year = parse(tr.xpath('.//td//text()')[2]).date().year item.image_url = 'https:' + tree.xpath( '//*[@id="contentInner"]/main/article/aside/div/div/a/img/@data-src' )[0] if tree.xpath( '//*[@id="contentInner"]/main/article/aside/div/div/a/img/@data-src' )[0].startswith('//') else tree.xpath( '//*[@id="contentInner"]/main/article/aside/div/div/a/img/@data-src' )[0] item.title = re.sub( '(FC2 PPV \\d{6,7})|(FC2-PPV-\\d{6,7})', '', tree.xpath( '//*[@id="contentInner"]/main/article/aside/div/div/h1/a/text()' )[0]).strip() # logger.debug(manual) if manual == True: if image_mode == '3': image_mode = '0' item.image_url = SiteUtil.process_image_mode( image_mode, item.image_url, proxy_url=proxy_url) if do_trans: item.title_ko = SystemLogicTrans.trans(item.title, source='ja', target='ko') item.ui_code = f'FC2-{result_codename}' # 스코어 계산 부분 필요 item.score = 100 logger.debug('score :%s %s ', item.score, item.ui_code) ret['data'].append(item.as_dict()) ret['data'] = sorted(ret['data'], key=lambda k: k['score'], reverse=True) ret['ret'] = 'success' except Exception as exception: logger.error('Exception:%s', exception) logger.error(traceback.format_exc()) ret['ret'] = 'exception' ret['data'] = str(exception) return ret
def _search(cls, module_char, keyword, do_trans=True, proxy_url=None, image_mode='0', manual=False): try: ret = {} keyword = keyword.strip().lower() if keyword[-3:-1] == 'cd': keyword = keyword[:-3] keyword = keyword.replace(' ', '-') # &is_dvd_product=1&type=dvd # &is_dvd_product=0&type=haishin if module_char == 'C': module_query = '&is_dvd_product=1&type=dvd' elif module_char == 'D': module_query = '&is_dvd_product=0&type=haishin' url = '{site_base_url}/search/cSearch.php?search_word={keyword}&x=0&y=0{module_query}'.format( site_base_url=cls.site_base_url, keyword=keyword, module_query=module_query) tree = SiteUtil.get_tree(url, proxy_url=proxy_url, headers=cls.headers) lists = tree.xpath('//*[@id="center_column"]/div[2]/div/ul/li') ret = {'data': []} score = 60 logger.debug('mgs search len lists2 :%s', len(lists)) if len(lists) > 10: lists = lists[:10] for node in lists: try: item = EntityAVSearch(cls.site_name) tag = node.xpath('.//a')[0] href = tag.attrib['href'].lower() #logger.debug(href) match = re.compile( r'\/product_detail\/(?P<code>.*?)\/').search(href) if match: item.code = cls.module_char + cls.site_char + match.group( 'code').upper() already_exist = False for exist_item in ret['data']: if exist_item['code'] == item.code: already_exist = True break if already_exist: continue tag = node.xpath('.//img')[0] item.image_url = tag.attrib['src'] tag = node.xpath('.//p[@class="title lineclamp"]')[0] item.title = item.title_ko = tag.text_content().strip() # tmp = SiteUtil.discord_proxy_get_target(item.image_url) # 2021-03-22 서치에는 discord 고정 url을 사용하지 않는다. 3번 # manual == False 때는 아예 이미치 처리를 할 필요가 없다. # 일치항목 찾기 때는 화면에 보여줄 필요가 있는데 3번은 하면 하지 않는다. if manual == True: if image_mode == '3': image_mode = '0' item.image_url = SiteUtil.process_image_mode( image_mode, item.image_url, proxy_url=proxy_url) if do_trans: item.title_ko = SystemLogicTrans.trans(item.title, source='ja', target='ko') match = re.compile( r'^(h_)?\d*(?P<real>[a-zA-Z]+)(?P<no>\d+)([a-zA-Z]+)?$' ).search(item.code[2:]) if match: item.ui_code = '%s-%s' % (match.group('real'), match.group('no')) else: item.ui_code = item.code[2:] item.score = 100 if item.ui_code.lower() == keyword.lower( ) else 60 - (len(ret['data']) * 10) item.score = 0 if item.score < 0 else item.score ret['data'].append(item.as_dict()) except Exception as exception: logger.error('Exception:%s', exception) logger.error(traceback.format_exc()) ret['data'] = sorted(ret['data'], key=lambda k: k['score'], reverse=True) ret['ret'] = 'success' return ret except Exception as exception: logger.error('Exception:%s', exception) logger.error(traceback.format_exc()) ret['ret'] = 'exception' ret['data'] = str(exception) return ret
def javdb_update(arg, retry=0): try: from . import Vars url = 'https://javdb.com/v/%s' % arg page = _session.get(url, headers=_headers, proxies=Vars.proxies) data = page.text data = '<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">' + data tree = html.fromstring(data) #logger.debug(data) ret = {} insert_code = '' # 샘플 이미지 변경해야함 DIV_INDEX = 3 DIV_BASE = '/html/body/section/div/div[%s]/div' javdb_base_18 = DIV_BASE % DIV_INDEX #base = javdb_base_18 + '/div[2]/nav' base = '//nav[@class="panel video-panel-info"]/div' #base_full = javdb_base_18 + '/div[1]/a/img' base_full = '//div[@class="column column-video-cover"]/a/img' #SAMPLE_TAG = DIV_BASE % (DIV_INDEX+1) + '/article/div/div' SAMPLE_TAG = '//div[@class="tile-images preview-images"]' #tag = tree.xpath('{base}/div[1]/span[2]'.format(base=base))[0] ret['code'] = '' ret['date'] = '' ret['running_time'] = '' ret['director'] = '' ret['director_ko'] = '' ret['studio'] = '' ret['studio_ko'] = '' ret['label'] = '' ret['label_ko'] = '' ret['series'] = '' ret['series_ko'] = '' ret['genre'] = [] ret['performer'] = [] # 2020-03-28 span 태그 2개 에서 strong , span 으로 #tags = tree.xpath('{base}/div'.format(base=base)) # 2020-10-30 tags = tree.xpath(base) # 2020-06-01 마찬가지로 여기도 단시간에 많은 요청시 문제발생 logger.debug('tags :%s', len(tags)) if len(tags) == 0: #에러 if retry < 5: logger.debug('JAVDB UPDATE RETRY : %s', arg) return javdb_update(arg, retry+1) else: #logger.debug(data) logger.debug('JAVDB UPDATE CRITICAL : %s', arg) return for tag in tags: #logger.debug(tag.text_content().strip()) #tmp = tag.xpath('span') #if tmp is None or len(tmp)<2: # break #label = tmp[0].text_content().strip() #value = tmp[1].text_content().strip() if not tag.xpath('strong') or not tag.xpath('span'): break label = tag.xpath('strong')[0].text_content().strip() value = tag.xpath('span')[0].text_content().strip() if label == '番號:': ret['code'] = value elif label == '時間:' or label == '日期:': ret['date'] = value.replace('-', '') elif label == '時長:': ret['running_time'] = value.split(' ')[0].strip() elif label == '導演:': if value.replace(' ', '') != 'N/A': ret['director'] = value ret['director_ko'] = SystemLogicTrans.trans(ret['director']) elif label == '片商:': if value.replace(' ', '') != 'N/A': ret['studio'] = value if ret['studio'] in _studio: ret['studio_ko'] = _studio[ret['studio']] elif ret['studio'] == 'カリビアンコム': ret['studio_ko'] = insert_code = 'Carib' elif ret['studio'] == 'pacopacomama': ret['studio_ko'] = insert_code = 'paco' elif ret['studio'] == '一本道': ret['studio_ko'] = insert_code = '1pondo' elif ret['studio'] == '10musume': ret['studio_ko'] = insert_code = '10mu' elif ret['studio'] == 'Tokyo-Hot': ret['studio_ko'] = insert_code = 'Tokyo-Hot' else: ret['studio_ko'] = SystemLogicTrans.trans(ret['studio']) elif label == '發行:': if value.replace(' ', '') != 'N/A': ret['label'] = value ret['label_ko'] = SystemLogicTrans.trans(ret['label']) elif label == '類別:': for tmp in value.split(','): tmp = tmp.strip() if tmp in _genre: ret['genre'].append(_genre[tmp]) continue tmp = SystemLogicTrans.trans(tmp).replace(' ', '') if tmp not in ['고화질', '독점전달', '세트상품', '단체작품', '기간한정세일', '기리모자', '데지모', '슬림', '미소녀', '미유', '망상족', '거유', '에로스', '작은']: ret['genre'].append(tmp) elif label == '演員:': nodes = tag.xpath('.//a') for node in nodes: entity = {} entity['id'] = '' entity['name'] = node.text_content().strip() entity = get_actor_info(entity) ret['performer'].append(entity) """ elif label == '类别:': nodes = tmp[1].xpath('.//a') for node in nodes: tmp = node.text_content().strip() if tmp in _genre: ret['genre'].append(_genre[tmp]) continue tmp = SystemLogicTrans.trans(tmp).replace(' ', '') if tmp not in ['고화질', '독점전달', '세트상품', '단체작품', '기간한정세일', '기리모자', '데지모', '슬림', '미소녀', '미유', '망상족', '거유', '에로스', '작은']: ret['genre'].append(tmp) """ tag = tree.xpath('/html/body/section/div/h2/strong')[0] ret['title'] = tag.text_content().replace(ret['code'], '').strip() ret['title_ko'] = SystemLogicTrans.trans(ret['title']) ret['summary'] = ret['title'] ret['summary_ko'] = ret['title_ko'] tag = tree.xpath(base_full)[0] ret['poster_full'] = tag.attrib['src'] from system.model import ModelSetting as SystemModelSetting ret['poster_full'] = '%s/av_agent/api/image_proxy?url=%s' % (SystemModelSetting.get('ddns'), ret['poster_full']) if SystemModelSetting.get_bool('auth_use_apikey'): ret['poster_full'] += '&apikey=%s' % SystemModelSetting.get('auth_apikey') search_data = javdb_search(ret['code']) logger.debug(search_data) target = None for s in search_data: if s['score'] == 100: target = s break #if len(search_data) == 1 and search_data[0]['score'] == 100: if target is not None: ret['poster'] = target['poster'] from system.model import ModelSetting as SystemModelSetting ret['poster'] = '%s/av_agent/api/image?url=%s' % (SystemModelSetting.get('ddns'), ret['poster']) if SystemModelSetting.get_bool('auth_use_apikey'): ret['poster'] += '&apikey=%s' % SystemModelSetting.get('auth_apikey') else: ret['poster'] = ret['poster_full'] ret['sample_image'] = [] try: tag = tree.xpath(SAMPLE_TAG) if tag: tag = tag[0] nodes = tag.xpath('.//a') for node in nodes: entity = {} entity['full'] = node.attrib['href'] #logger.debug(entity['full']) tag = node.xpath('.//img')[0] entity['thumb'] = tag.attrib['src'] from system.model import ModelSetting as SystemModelSetting entity['full'] = '%s/av_agent/api/image_proxy?url=%s' % (SystemModelSetting.get('ddns'), entity['full']) entity['thumb'] = '%s/av_agent/api/image_proxy?url=%s' % (SystemModelSetting.get('ddns'), entity['thumb']) if SystemModelSetting.get_bool('auth_use_apikey'): entity['full'] += '&apikey=%s' % SystemModelSetting.get('auth_apikey') entity['thumb'] += '&apikey=%s' % SystemModelSetting.get('auth_apikey') ret['sample_image'].append(entity) except: pass if insert_code != '': insert_code += ' ' ret['code_show'] = insert_code + ret['code'] ret['release'] = '' match = re.compile(r'(?P<real>[a-zA-Z]+)-(?P<no>\d+)').match(ret['code']) if match: ret['release'] = match.group('real') if ret['release'] == '': ret['release'] = ret['studio_ko'] ret['rating'] = '0' ret['result'] = 'success' return ret except Exception as exception: logger.error('Exception:%s', exception) logger.error(traceback.format_exc()) return False
def search(cls, keyword, do_trans=True, proxy_url=None, image_mode='0', manual=False): try: ret = {} keyword = keyword.strip().lower() # 2020-06-24 if keyword[-3:-1] == 'cd': keyword = keyword[:-3] keyword = keyword.replace('-', ' ') keyword_tmps = keyword.split(' ') if len(keyword_tmps) == 2: if len(keyword_tmps[1]) <= 5: dmm_keyword = '%s%s' % (keyword_tmps[0], keyword_tmps[1].zfill(5)) elif len(keyword_tmps[1]) > 5: dmm_keyword = '%s%s' % (keyword_tmps[0], keyword_tmps[1]) else: dmm_keyword = keyword logger.debug('keyword [%s] -> [%s]', keyword, dmm_keyword) url = '%s/digital/videoa/-/list/search/=/?searchstr=%s' % (cls.site_base_url, dmm_keyword) #url = '%s/search/=/?searchstr=%s' % (cls.site_base_url, dmm_keyword) #https://www.dmm.co.jp/search/=/searchstr=tsms00060/ tree = SiteUtil.get_tree(url, proxy_url=proxy_url, headers=cls.dmm_headers) lists = tree.xpath('//*[@id="list"]/li') ret = {'data' : []} score = 60 logger.debug('dmm search len lists2 :%s', len(lists)) if len(lists) > 10: lists = lists[:10] for node in lists: try: item = EntityAVSearch(cls.site_name) tag = node.xpath('.//div/p[@class="tmb"]/a')[0] href = tag.attrib['href'].lower() match = re.compile(r'\/cid=(?P<code>.*?)\/').search(href) if match: item.code = cls.module_char + cls.site_char + match.group('code') already_exist = False for exist_item in ret['data']: if exist_item['code'] == item.code: already_exist = True break if already_exist: continue tag = node.xpath('.//span[1]/img')[0] item.title = item.title_ko = tag.attrib['alt'] item.image_url = tag.attrib['src'] # tmp = SiteUtil.discord_proxy_get_target(item.image_url) # 2021-03-22 서치에는 discord 고정 url을 사용하지 않는다. 3번 # manual == False 때는 아예 이미치 처리를 할 필요가 없다. # 일치항목 찾기 때는 화면에 보여줄 필요가 있는데 3번은 하면 하지 않는다. if manual == True: if image_mode == '3': image_mode = '0' item.image_url = SiteUtil.process_image_mode(image_mode, item.image_url, proxy_url=proxy_url) if do_trans: item.title_ko = SystemLogicTrans.trans(item.title, source='ja', target='ko') match = re.compile(r'^(h_)?\d*(?P<real>[a-zA-Z]+)(?P<no>\d+)([a-zA-Z]+)?$').search(item.code[2:]) if match: item.ui_code = '%s%s' % (match.group('real'), match.group('no')) else: item.ui_code = item.code[2:] if len(keyword_tmps) == 2: #2019-11-20 ntr mntr 둘다100 if item.ui_code == dmm_keyword: item.score = 100 elif item.ui_code.replace('0','') == dmm_keyword.replace('0',''): item.score = 100 elif item.ui_code.find(dmm_keyword) != -1: #전체포함 DAID => AID item.score = score score += -5 elif item.code.find(keyword_tmps[0]) != -1 and item.code.find(keyword_tmps[1]) != -1: item.score = score score += -5 elif item.code.find(keyword_tmps[0]) != -1 or item.code.find(keyword_tmps[1]) != -1: item.score = 60 else: item.score = 20 else: if item.code == keyword_tmps[0]: item.score = 100 elif item.code.find(keyword_tmps[0]) != -1: item.score = score score += -5 else: item.score = 20 if match: item.ui_code = '%s-%s' % (match.group('real').upper(), str(int(match.group('no'))).zfill(3)) else: if item.ui_code.find ('0000') != -1: item.ui_code = item.ui_code.replace('0000', '-00').upper() else: item.ui_code = item.ui_code.replace('00', '-').upper() if item.ui_code.endswith('-'): item.ui_code = '%s00' % (item.ui_code[:-1]) logger.debug('score :%s %s ', item.score, item.ui_code) ret['data'].append(item.as_dict()) except Exception as exception: logger.error('Exception:%s', exception) logger.error(traceback.format_exc()) ret['data'] = sorted(ret['data'], key=lambda k: k['score'], reverse=True) ret['ret'] = 'success' if len(ret['data']) == 0 and len(keyword_tmps) == 2 and len(keyword_tmps[1]) == 5: new_title = '%s%s' % (keyword_tmps[0], keyword_tmps[1].zfill(6)) return cls.search(new_title, do_trans=do_trans, proxy_url=proxy_url, image_mode=image_mode) except Exception as exception: logger.error('Exception:%s', exception) logger.error(traceback.format_exc()) ret['ret'] = 'exception' ret['data'] = str(exception) return ret
def dmm_update(arg, use_discord_proxy=False): try: from system.model import ModelSetting as SystemModelSetting from . import Vars url = 'https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=%s/' % arg page = _session.get(url, headers=_headers, proxies=Vars.proxies) #logger.debug(url) data = page.text tree = html.fromstring(data) ret = {} nodes = tree.xpath('//*[@id="mu"]/div/table//tr/td[1]/div[1]/div[2]') if not nodes: #logger.debug(data) logger.debug('CRITICAL!!!') return data #a_nodes = nodes[0].xpath('.//a') ret['poster_full'] = '' ret['poster'] = '' try: a_nodes = nodes[0].xpath('.//a') anodes = a_nodes #logger.debug(html.tostring(anodes[0])) tag = anodes[0].xpath('.//img')[0] ret['poster_full'] = a_nodes[0].attrib['href'] ret['poster'] = tag.attrib['src'] except: tag = nodes[0].xpath('.//img')[0] ret['poster'] = img_tag.attrib['src'] ret['poster_full'] = ret['poster'] if ret['poster'] != '' and use_discord_proxy: ret['poster'] = '%s/av_agent/api/discord_proxy?url=%s' % (SystemModelSetting.get('ddns'), ret['poster']) if SystemModelSetting.get_bool('auth_use_apikey'): ret['poster'] += '&apikey=%s' % SystemModelSetting.get('auth_apikey') elif ret['poster'] != '': ret['poster'] = '%s/av_agent/api/image_proxy?url=%s' % (SystemModelSetting.get('ddns'), ret['poster']) if SystemModelSetting.get_bool('auth_use_apikey'): ret['poster'] += '&apikey=%s' % SystemModelSetting.get('auth_apikey') if ret['poster_full'] != '' and use_discord_proxy: ret['poster_full'] = '%s/av_agent/api/discord_proxy?url=%s' % (SystemModelSetting.get('ddns'), ret['poster_full']) if SystemModelSetting.get_bool('auth_use_apikey'): ret['poster_full'] += '&apikey=%s' % SystemModelSetting.get('auth_apikey') if ret['poster_full'] != '': from system.model import ModelSetting as SystemModelSetting ret['poster_full'] = '%s/av_agent/api/image_proxy?url=%s' % (SystemModelSetting.get('ddns'), ret['poster_full']) if SystemModelSetting.get_bool('auth_use_apikey'): ret['poster_full'] += '&apikey=%s' % SystemModelSetting.get('auth_apikey') ret['title'] = tag.attrib['alt'] ret['title_ko'] = SystemLogicTrans.trans(ret['title']) try: tag = tree.xpath('//*[@id="mu"]/div/table//tr/td[1]/table//tr[4]/td[2]') ret['date'] = tag[0].text_content().replace('/', '').strip() except: ret['date'] = '' if len(ret['date']) != 8: try: tag = tree.xpath('//*[@id="mu"]/div/table//tr/td[1]/table//tr[3]/td[2]') ret['date'] = tag[0].text_content().replace('/', '').strip() except: ret['date'] = '' tag = tree.xpath('//*[@id="mu"]/div/table//tr/td[1]/table//tr[5]/td[2]') match = re.compile(r'^(?P<time>\d+)').search(tag[0].text_content()) if match: ret['running_time'] = match.group('time') else: ret['running_time'] = '' nodes = tree.xpath('//*[@id="performer"]/a') ret['performer'] = [] for node in nodes: entity = {} match = re.compile(r'\/id=(?P<id>.*?)\/').search(node.attrib['href']) if match: entity['id'] = match.group('id') entity['name'] = node.text_content() entity = get_actor_info(entity) ret['performer'].append(entity) ret = _set_info(tree, ret, '//*[@id="mu"]/div/table//tr/td[1]/table//tr[7]/td[2]/a', 'director') ret = _set_info(tree, ret, '//*[@id="mu"]/div/table//tr/td[1]/table//tr[8]/td[2]/a', 'series') ret = _set_info(tree, ret, '//*[@id="mu"]/div/table//tr/td[1]/table//tr[9]/td[2]/a', 'studio') ret = _set_info(tree, ret, '//*[@id="mu"]/div/table//tr/td[1]/table//tr[10]/td[2]/a', 'label') #콘텐츠 유형 : 3D #라벨 이후에 3D 는 콘텐츠유형이 들어감 # 장르와 코드가 영향 tmp = tree.xpath('//*[@id="mu"]/div/table//tr/td[1]/table//tr[11]/td')[0] if tmp.text_content().strip() == u'ジャンル:': current_tr_index = 11 else: current_tr_index = 12 ret['genre'] = [] nodes = tree.xpath('//*[@id="mu"]/div/table//tr/td[1]/table//tr[%s]/td[2]/a' % current_tr_index) for node in nodes: tmp = node.text_content().strip() #logger.debug(tmp) if tmp.find('30%OFF') != -1: #logger.debug('30000000000000000000000000') continue if tmp in _genre: ret['genre'].append(_genre[tmp]) continue tmp = SystemLogicTrans.trans(tmp).replace(' ', '') #logger.debug(tmp) if tmp not in ['고화질', '독점전달', '세트상품', '단체작품', '기간한정세일', '기리모자', '데지모', '슬림', '미소녀', '미유', '망상족', '거유', '에로스', '작은', '섹시']: ret['genre'].append(tmp) tag = tree.xpath('//*[@id="mu"]/div/table//tr/td[1]/table//tr[%s]/td[2]' % (current_tr_index+1)) ret['code'] = tag[0].text_content() match = re.compile(r'(h_)?\d*(?P<real>[a-zA-Z]+)(?P<no>\d+)([a-zA-Z]+)?$').match(ret['code']) if match: ret['code_show'] = '%s%s' % (match.group('real'), match.group('no')) ret['release'] = match.group('real') else: ret['code_show'] = ret['code'] ret['release'] = '' # if ret['code_show'].find ('0000') != -1: ret['code_show'] = ret['code_show'].replace('0000', '-00').upper() else: ret['code_show'] = ret['code_show'].replace('00', '-').upper() if ret['code_show'].endswith('-'): ret['code_show'] = '%s00' % (ret['code_show'][:-1]) try: ret['rating'] = '0' tag = tree.xpath('//*[@id="mu"]/div/table//tr/td[1]/table//tr[13]/td[2]/img') if tag: match = re.compile(r'\/(?P<rating>.*?)\.gif').match(tag[0].attrib['src']) if match: tmps = match.group('rating').split('/') ret['rating'] = tmps[len(tmps)-1].replace('_', '.') except Exception as exception: logger.error('Exception:%s', exception) logger.error(traceback.format_exc()) tag = tree.xpath('//*[@id="mu"]/div/table//tr/td[1]/div[4]') ret['summary'] = tag[0].text_content().split('※')[0].strip() ret['summary_ko'] = SystemLogicTrans.trans(ret['summary']) nodes = tree.xpath('//*[@id="sample-image-block"]/a') ret['sample_image'] = [] for node in nodes: entity = {} tag = node.xpath('.//img') entity['thumb'] = tag[0].attrib['src'] entity['full'] = entity['thumb'].replace(ret['code']+'-', ret['code']+'jp-') from system.model import ModelSetting as SystemModelSetting entity['full'] = '%s/av_agent/api/image_proxy?url=%s' % (SystemModelSetting.get('ddns'), entity['full']) entity['thumb'] = '%s/av_agent/api/image_proxy?url=%s' % (SystemModelSetting.get('ddns'), entity['thumb']) if SystemModelSetting.get_bool('auth_use_apikey'): entity['full'] += '&apikey=%s' % SystemModelSetting.get('auth_apikey') entity['thumb'] += '&apikey=%s' % SystemModelSetting.get('auth_apikey') ret['sample_image'].append(entity) ret['result'] = 'success' return ret except Exception as exception: logger.error('Exception:%s', exception) logger.error(traceback.format_exc()) return False
def dmm_search(keyword, do_trans=True): try: keyword = keyword.strip().lower() # 2020-06-24 if keyword[-3:-1] == 'cd': keyword = keyword[:-3] keyword = keyword.replace('-', ' ') tmps = keyword.split(' ') if len(tmps) == 2: if len(tmps[1]) <= 5: title = '%s%s' % (tmps[0], tmps[1].zfill(5)) elif len(tmps[1]) > 5: title = '%s%s' % (tmps[0], tmps[1]) else: title = keyword logger.debug('keyword %s -> %s', keyword, title) #url = 'https://www.dmm.co.jp/search/=/searchstr=%s/n1=FgRCTw9VBA4GAVhfWkIHWw__/' % title #2020-06-14 n1값이 외국 리전으로 인식함. vpn 연결해서 확인한 값으로 수정 #url = 'https://www.dmm.co.jp/search/=/searchstr=%s/n1=FgRCTw9VBA4GFUFV/' % title #url = 'https://www.dmm.co.jp/search/=/searchstr=%s/analyze=V1ECCVYAUAQ_/n1=FgRCTw9VBA4GFUFV' % title url = 'https://www.dmm.co.jp/digital/videoa/-/list/search/=/?searchstr=%s' % title logger.debug(url) #logger.debug(url) page = _session.get(url, headers=_headers, proxies=Vars.proxies) data = page.text logger.debug('text len : %s', len(data)) tree = html.fromstring(data) lists = tree.xpath('//*[@id="list"]/li') #tmp = tree.xpath('//*[@id="list"]')[0] #lists = tmp.xpath('li') #logger.debug('dmm serarch count:%s' % len(lists)) ret = [] score = 60 logger.debug('len lists2 :%s', len(lists)) for node in lists: try: #//*[@id="list"]/li/div/p[2]/a entity = {'meta':'dmm'} #logger.debug(node.xpath('.//div')[0].text_content()) #logger.debug(node.xpath('.//div/p[2]')[0].text_content()) #logger.debug(node.xpath('.//div/p[2]/a')[0].text_content()) tag = node.xpath('.//div/p[@class="tmb"]/a')[0] href = tag.attrib['href'].lower() match = re.compile(r'\/cid=(?P<code>.*?)\/').search(href) if match: entity['id'] = match.group('code') already_exist = False for exist_item in ret: if exist_item['id'] == entity['id']: already_exist = True break if already_exist: continue #logger.debug(entity['id']) tag = node.xpath('.//span[1]/img')[0] entity['title'] = tag.attrib['alt'] entity['title_ko'] = SystemLogicTrans.trans(entity['title']) if do_trans else entity['title'] match = re.compile(r'(h_)?\d*(?P<real>[a-zA-Z]+)(?P<no>\d+)([a-zA-Z]+)?$').search(entity['id']) if match: entity['id_show'] = '%s%s' % (match.group('real'), match.group('no')) else: entity['id_show'] = entity['id'] #logger.debug(entity['id_show']) #logger.debug(title) if len(tmps) == 2: #2019-11-20 ntr mntr 둘다100 if entity['id_show'] == title: entity['score'] = 100 elif entity['id_show'].replace('0','') == title.replace('0',''): entity['score'] = 100 elif entity['id_show'].find(title) != -1: #전체포함 DAID => AID entity['score'] = score score += -5 elif entity['id'].find(tmps[0]) != -1 and entity['id'].find(tmps[1]) != -1: entity['score'] = score score += -5 elif entity['id'].find(tmps[0]) != -1 or entity['id'].find(tmps[1]) != -1: entity['score'] = 60 else: entity['score'] = 20 else: if entity['id'] == tmps[0]: entity['score'] = 100 elif entity['id'].find(tmps[0]) != -1: entity['score'] = score score += -5 else: entity['score'] = 20 if entity['id_show'].find ('0000') != -1: entity['id_show'] = entity['id_show'].replace('0000', '-00').upper() else: entity['id_show'] = entity['id_show'].replace('00', '-').upper() if entity['id_show'].endswith('-'): entity['id_show'] = '%s00' % (entity['id_show'][:-1]) logger.debug('score :%s %s ', entity['score'], entity['id_show']) ret.append(entity) except Exception as exception: logger.error('Exception:%s', exception) logger.error(traceback.format_exc()) ret = sorted(ret, key=lambda k: k['score'], reverse=True) if len(ret) == 0 and len(tmps) == 2 and len(tmps[1]) == 5: new_title = '%s%s' % (tmps[0], tmps[1].zfill(6)) return dmm_search(new_title) else: return ret except Exception as exception: logger.error('Exception:%s', exception) logger.error(traceback.format_exc()) return False
def search(cls, keyword, do_trans=True, proxy_url=None, image_mode='0', manual=False): try: ret = {} if re.search('(\\d{6}-\\d{3})', keyword, re.I) is not None: code = re.search('(\\d{6}-\\d{3})', keyword, re.I).group() else: # logger.debug(f'invalid keyword: {keyword}') ret['ret'] = 'failed' ret['data'] = 'invalid keyword' return ret url = f'{cls.site_base_url}/moviepages/{code}/index.html' if SiteUtil.get_response(url, proxy_url=proxy_url).status_code == 404: # logger.debug(f'not found: {keyword}') ret['ret'] = 'failed' ret['data'] = 'not found' return ret tree = SiteUtil.get_tree(url, proxy_url=proxy_url) ret = {'data': []} item = EntityAVSearch(cls.site_name) item.code = cls.module_char + cls.site_char + code item.title = item.title_ko = tree.xpath( '//div[@id="moviepages"]//h1[@itemprop="name"]/text()' )[0].strip() item.year = parse( tree.xpath( '//div[@class="movie-info section"]//li[@class="movie-spec"]/span[@itemprop="uploadDate"]/text()' )[0].strip()).date().year item.image_url = f'https://www.caribbeancom.com/moviepages/{code}/images/l_l.jpg' if manual == True: if image_mode == '3': image_mode = '0' item.image_url = SiteUtil.process_image_mode( image_mode, item.image_url, proxy_url=proxy_url) if do_trans: item.title_ko = SystemLogicTrans.trans(item.title, source='ja', target='ko') item.ui_code = f'carib-{code}' if 'carib' in keyword.lower(): item.score = 100 else: item.score = 90 logger.debug('score :%s %s ', item.score, item.ui_code) ret['data'].append(item.as_dict()) ret['data'] = sorted(ret['data'], key=lambda k: k['score'], reverse=True) ret['ret'] = 'success' except Exception as exception: logger.error('Exception:%s', exception) logger.error(traceback.format_exc()) ret['ret'] = 'exception' ret['data'] = str(exception) return ret
def search(cls, keyword, do_trans=True, proxy_url=None, image_mode='0', manual=False): try: ret = {} keyword = keyword.strip().lower() url = f'{cls.site_base_url}/{keyword}/' tree = SiteUtil.get_tree(url, proxy_url=proxy_url) ret = {'data': []} if tree.xpath('/html/head/title/text()')[0] == 'お探しの商品が見つかりません': logger.debug(f'not found: {keyword}') logger.debug(f'try search google cache') cache = cls.search_cache(url) if cache is not None: tree = cache else: ret['ret'] = 'failed' ret['data'] = 'not found' return ret item = EntityAVSearch(cls.site_name) item.code = cls.module_char + cls.site_char + keyword item.title = item.title_ko = tree.xpath( '//*[@id="top"]/div[1]/section[1]/div/section/div[2]/h3/text()' )[0].strip() item.year = re.search( '\d{4}/\d{2}/\d{2}', tree.xpath( '//*[@id="top"]/div[1]/section[1]/div/section/div[2]/div[2]/p/text()' )[0]).group(0).split('/')[0] item.image_url = 'https:' + tree.xpath( '//*[@id="top"]/div[1]/section[1]/div/section/div[1]/span/img/@src' )[0] if manual == True: if image_mode == '3': image_mode = '0' item.image_url = SiteUtil.process_image_mode( image_mode, item.image_url, proxy_url=proxy_url) if do_trans: item.title_ko = SystemLogicTrans.trans(item.title, source='ja', target='ko') item.ui_code = f'FC2-{keyword}' # 스코어 계산 부분 필요 item.score = 100 logger.debug('score :%s %s ', item.score, item.ui_code) ret['data'].append(item.as_dict()) ret['data'] = sorted(ret['data'], key=lambda k: k['score'], reverse=True) ret['ret'] = 'success' except Exception as exception: logger.error('Exception:%s', exception) logger.error(traceback.format_exc()) ret['ret'] = 'exception' ret['data'] = str(exception) return ret
def search(cls, keyword, do_trans=True, proxy_url=None, image_mode='0', manual=False): from metadata import P as MetadataPlugin MetadataModelSetting = MetadataPlugin.ModelSetting try: ret = {} keyword = keyword.strip().lower() site_base_url = MetadataModelSetting.get('jav_fc2_7mmtv_url') url = f'{site_base_url}/ko/uncensored_search/all/{keyword}/1.html' tree = SiteUtil.get_tree(url, proxy_url=proxy_url) ret = {'data': []} item = EntityAVSearch(cls.site_name) if tree.xpath('//div[@class="latest-korean-box-row"]'): search_result = (zip( tree.xpath( '/html/body/section[2]/div/article/div/div/div[1]/div/div/div[2]/a/h2/text()' ), tree.xpath( '/html/body/section[2]/div/article/div/div/div[1]/div/div/div[2]/a/@href' ), tree.xpath( '/html/body/section[2]/div/article/div/div/div[1]/div/div/div[1]/a/img/@src' ))) for search_title, url, thumb in search_result: if keyword in search_title: item.title = item.title_ko = re.sub( '(\[?FC2-?PPV-? ?\\d{6,7}\]?)', '', search_title, flags=re.I).strip() item.code = cls.module_char + cls.site_char + url.split( '/')[5] item.image_url = thumb break else: logger.debug(f'not found: {keyword}') ret['ret'] = 'failed' ret['data'] = 'not found' return ret # 검색에서는 연도 파악 불가 # item.year = '' if manual == True: if image_mode == '3': image_mode = '0' item.image_url = SiteUtil.process_image_mode( image_mode, item.image_url, proxy_url=proxy_url) if do_trans: item.title_ko = SystemLogicTrans.trans(item.title, source='ja', target='ko') item.ui_code = f'FC2-{keyword}' # 스코어 계산 부분 필요 # 장르 없는 경우 대다수, 그래서 90 item.score = 90 logger.debug('score :%s %s ', item.score, item.ui_code) ret['data'].append(item.as_dict()) ret['data'] = sorted(ret['data'], key=lambda k: k['score'], reverse=True) ret['ret'] = 'success' except Exception as exception: logger.error('Exception:%s', exception) logger.error(traceback.format_exc()) ret['ret'] = 'exception' ret['data'] = str(exception) return ret
def javdb_search(keyword, do_trans=True, retry=0): try: from . import Vars keyword = keyword.strip().replace(' ', '-') #if content_type != 'censored': logger.debug('javdb keyword:%s', keyword) keyword = _javdb_prefer_keyword(keyword) logger.debug('javdb prefer keyword:%s', keyword) logger.debug('Keyword :%s', keyword) url = 'https://javdb.com/videos/search_autocomplete.json?q=%s' % keyword logger.debug('url : %s', url) page = _session.get(url, headers=_headers, proxies=Vars.proxies) #logger.debug(page.text) try: data = page.json() except ValueError: # 많은요청시 발생 if retry < 5: logger.debug('ValueError... wait:%s', retry) time.sleep(retry+1) return javdb_search(keyword, do_trans=do_trans, retry=retry+1) else: logger.debug('ValueError Critical!!!') logger.debug('ValueError Critical!!!') return ret = [] score = 60 find_correct = False for item in data: try: entity = {'meta':'javdb'} entity['id'] = item['uid'] entity['id_show'] = item['number'] entity['title'] = item['title'] tmp = entity['title'].replace('[%s]' % entity['id_show'], '').strip() entity['title_ko'] = SystemLogicTrans.trans(tmp) if do_trans else tmp entity['poster'] = item['cover_url'] if entity['poster'].startswith('//'): entity['poster'] = 'https:' + entity['poster'] logger.debug('javdb search entity[id_show]:[%s] keyword:[%s]', entity['id_show'], keyword) if entity['id_show'] is None or keyword is None: continue if entity['id_show'].upper().replace('-', ' ').replace('_', ' ') == keyword.upper().replace('-', ' ').replace('_', ' '): entity['score'] = 100 find_correct = True else: if find_correct: break entity['score'] = score score += -5 logger.debug('entity[score] : %s', entity['score']) ret.append(entity) except Exception as exception: logger.error('Exception:%s', exception) logger.error(traceback.format_exc()) ret = sorted(ret, key=lambda k: k['score'], reverse=True) return ret except Exception as exception: logger.error('Exception:%s', exception) logger.error(traceback.format_exc())
def trans(cls, text, do_trans=True, source='ja', target='ko'): if do_trans: return SystemLogicTrans.trans(text, source=source, target=target) return text