def __init__(self, manga_element: etree.ElementBase, release_interval: timedelta): title = manga_element.cssselect('cite')[0].text self.chapter_decimal: Optional[int] = None ch = manga_element.cssselect('.simulpub-card__badge span')[0].text match = None if 'ex' in ch.lower(): match = self.SPECIAL_RE.match(ch) if match: ch = match.groups() self.chapter_decimal = 5 else: ch = ch.split('.') # If special chapter, set latest chapter to -1 if 'ex' in ch[0].lower(): latest_chapter = -1 else: latest_chapter = int(ch[0]) if len(ch) > 1: self.chapter_decimal = int(ch[1]) self.author = manga_element.cssselect('.proper-noun')[0].text title_id = manga_element.cssselect( '.card__link')[0].attrib['href'].strip('/').split('/')[-1] self.sources = [ Source(elem, self) for elem in manga_element.cssselect( '.simulpub-card__partners li a') ] self.release_date = datetime.utcnow()
def _cssselect(cls, parser: ElementBase, selector) -> List[ElementBase]: if selector is None: return [parser] return parser.cssselect(selector)
def __init__(self, chapter_element: etree.ElementBase, group_id: Optional[int] = None): title = chapter_element.cssselect( '.content-info .content-subtitle')[0].text or '' title = title.strip() if title.lower().startswith('vol'): self.invalid = True return add_to_cart = chapter_element.cssselect( '.action-button span.action-title')[0].text if add_to_cart.lower() == 'pre-order': self.invalid = True return self.invalid = False manga_title = chapter_element.cssselect( '.content-title.cu-alc')[0].text ch = title.split('#')[-1].split('.') if not title: title = chapter_element.cssselect( '.content-info .content-title')[0].text or '' match = extra_regex.match(title) if match: ch = match.groups() elif not title.lower().endswith('extra'): logger.warning( f'Empty title for {manga_title} actual title {title}. Might be an extra issue' ) title = title.split(':')[-1] if ':' in title else 'Extra' special_match = extra_chapter_regex.match(ch[0]) if special_match: ch = special_match.groups() try: chapter_number = int(ch[0] or 0) except ValueError: chapter_number = 0 chapter_decimal = None if len(ch) > 1 and ch[1] is not None: chapter_decimal = int(ch[1]) title = title self.url = chapter_element.cssselect( 'a.content-details')[0].attrib['href'] chapter_identifier = chapter_element.cssselect( 'a.content-details')[0].attrib['href'].split('/')[-1] title_id = chapter_element.cssselect( '.action-button.expand-action')[0].attrib.get( 'data-expand-menu-data', '') found = title_regex.findall(title_id) if not found: # Not all titles have title id set (probably only applies to newer titles). # If not mark as invalid and skip self.invalid = True logger.debug(f'Title id not found for {self.url}') return # raise ValueError('Title id not found for comiXology chapter') if len(found) > 1: logger.warning(f'Multiple title ids found for {self.url}') title_id = found[0] self.release_date_maybe: Optional[datetime] = None self._created_at = datetime.utcnow() super().__init__(chapter_title=title, chapter_number=chapter_number, chapter_identifier=chapter_identifier, title_id=title_id, volume=None, decimal=chapter_decimal, release_date=None, manga_title=manga_title, manga_url=None, group=ComiXology.NAME, group_id=group_id)
def get_chapter_elements( root: etree.ElementBase) -> List[etree.ElementBase]: return list(root.cssselect('li.content-item'))