def main(number: str): raw_cookies, user_agent = get_javlib_cookie() # Blank cookies mean javlib site return error if not raw_cookies: return json.dumps({}, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':')) # Manually construct a dictionary s_cookie = SimpleCookie() s_cookie.load(raw_cookies) cookies = {} for key, morsel in s_cookie.items(): cookies[key] = morsel.value # Scraping result = get_html( "http://www.m45e.com/cn/vl_searchbyid.php?keyword={}".format(number), cookies=cookies, ua=user_agent, return_type="object") soup = BeautifulSoup(result.text, "html.parser") lx = html.fromstring(str(soup)) if "/?v=jav" in result.url: dic = { "title": get_title(lx, soup), "studio": get_table_el_single_anchor(soup, "video_maker"), "year": get_table_el_td(soup, "video_date")[:4], "outline": "", "director": get_table_el_single_anchor(soup, "video_director"), "cover": get_cover(lx), "imagecut": 1, "actor_photo": "", "website": result.url, "source": "javlib.py", "actor": get_table_el_multi_anchor(soup, "video_cast"), "label": get_table_el_td(soup, "video_label"), "tag": get_table_el_multi_anchor(soup, "video_genres"), "number": get_table_el_td(soup, "video_id"), "release": get_table_el_td(soup, "video_date"), "runtime": get_from_xpath( lx, '//*[@id="video_length"]/table/tr/td[2]/span/text()'), "series": '', } else: dic = {} return json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'))
def main(number: str): raw_cookies, user_agent = get_javlib_cookie() # Blank cookies mean javlib site return error if not raw_cookies: return json.dumps({}, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':')) # Manually construct a dictionary s_cookie = SimpleCookie() s_cookie.load(raw_cookies) cookies = {} for key, morsel in s_cookie.items(): cookies[key] = morsel.value # Scraping result = get_html( "http://www.javlibrary.com/cn/vl_searchbyid.php?keyword={}".format(number), cookies=cookies, ua=user_agent, return_type="object" ) soup = BeautifulSoup(result.text, "html.parser") lx = html.fromstring(str(soup)) fanhao_pather = re.compile(r'<a href=".*?".*?><div class="id">(.*?)</div>') fanhao = fanhao_pather.findall(result.text) if "/?v=jav" in result.url: dic = { "title": get_title(lx, soup), "studio": get_table_el_single_anchor(soup, "video_maker"), "year": get_table_el_td(soup, "video_date")[:4], "outline": get_outline(number), "director": get_table_el_single_anchor(soup, "video_director"), "cover": get_cover(lx), "imagecut": 1, "actor_photo": "", "website": result.url, "source": "javlib.py", "actor": get_table_el_multi_anchor(soup, "video_cast"), "label": get_table_el_td(soup, "video_label"), "tag": get_table_el_multi_anchor(soup, "video_genres"), "number": get_table_el_td(soup, "video_id"), "release": get_table_el_td(soup, "video_date"), "runtime": get_from_xpath(lx, '//*[@id="video_length"]/table/tr/td[2]/span/text()'), "series":'', } elif number.upper() in fanhao: url_pather = re.compile(r'<a href="(.*?)".*?><div class="id">(.*?)</div>') s = {} url_list = url_pather.findall(result.text) for url in url_list: s[url[1]] = 'http://www.javlibrary.com/cn/' + url[0].lstrip('.') av_url = s[number.upper()] result = get_html( av_url, cookies=cookies, ua=user_agent, return_type="object" ) soup = BeautifulSoup(result.text, "html.parser") lx = html.fromstring(str(soup)) dic = { "title": get_title(lx, soup), "studio": get_table_el_single_anchor(soup, "video_maker"), "year": get_table_el_td(soup, "video_date")[:4], "outline": get_outline(number), "director": get_table_el_single_anchor(soup, "video_director"), "cover": get_cover(lx), "imagecut": 1, "actor_photo": "", "website": result.url, "source": "javlib.py", "actor": get_table_el_multi_anchor(soup, "video_cast"), "label": get_table_el_td(soup, "video_label"), "tag": get_table_el_multi_anchor(soup, "video_genres"), "number": get_table_el_td(soup, "video_id"), "release": get_table_el_td(soup, "video_date"), "runtime": get_from_xpath(lx, '//*[@id="video_length"]/table/tr/td[2]/span/text()'), "series": '', } else: dic = {"title": ""} return json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'))