def __get_brief_by_card(card): columns = card.select(".column") code = columns[4].next.strip() actress = ", ".join( (x.text.strip() for x in columns[2].find_all(name="span"))) title = columns[3].text.strip() img, _ = try_evaluate(lambda: columns[3].a.attrs["rel"][0]) release_date = columns[1].text.strip() brief = Brief() brief.title = title brief.preview_img_url = img brief.code = code brief.actress = actress brief.set_release_date(release_date) return brief
def get_brief(cls, code): html = cls.__client.get("http://www.javlibrary.com/ja/vl_searchbyid.php?keyword=" + code).text match = re.search(r"\"og:url\" content=\"//(.+?)\">", html) if not match: return None url = match.group(1) html = cls.__client.get("http://" + url).text brief = Brief() bs = bs4.BeautifulSoup(html, "lxml") brief.title = bs.select(".post-title")[0].text brief.preview_img_url = bs.select("#video_jacket_img")[0].attrs['src'] if not brief.preview_img_url.startswith("http"): brief.preview_img_url = "http:" + brief.preview_img_url brief.code = code date = bs.select("#video_date")[0].select("td")[-1].text brief.set_release_date(date) brief.actress = ", ".join((span.text for span in bs.select(".cast"))) return brief
def get_brief_from_a_card(card_tag): release_date, _ = try_evaluate(lambda: datetime.datetime.strptime( re.search(r"\d\d\d\d-\d\d-\d\d", card_tag.text).group(0), "%Y-%m-%d")) actress = list( map(lambda x: x.text, card_tag.find_all(name='a', attrs={'class': 'btn-danger'}))) img, _ = try_evaluate(lambda: card_tag.find(name='img').attrs['src']) if not img.startswith("http:"): img = "http:" + img brief = Brief() brief.preview_img_url = img brief.title, _ = try_evaluate( lambda: card_tag.find(name='h5').text.strip(), "") brief.actress = ", ".join(actress) brief.set_release_date(release_date) brief.code = card_tag.find(name='h4').text.strip() return brief
def __get_brief_by_box(box): code = box.find(name='span', attrs={'class': 'video_id'}).text div = box.find(name='div', attrs={'class': 'col-sm-7'}) actress = ", ".join( map(lambda x: x.text, div.find_all(name='div', attrs={'class': 'col-xs-6'}))) title = div.find(name='span', attrs={'class': 'video_title'}).text img, _ = try_evaluate( lambda: div.find(name='span', attrs={ 'class': 'preview_btn' }).attrs['rel']) release_date = box.find(name='div', attrs={ 'class': 'col-sm-2' }).span.text brief = Brief() brief.title = title.strip() brief.preview_img_url = img brief.code = code.strip() brief.actress = actress.strip() brief.set_release_date(release_date) return brief