Exemplos de ElementBase.cssselect em Python, exemplos de lxml.etree.ElementBase.cssselect em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: kodansha.py Projeto: s0hv/manga-tracker

    def __init__(self, manga_element: etree.ElementBase,
                 release_interval: timedelta):
        title = manga_element.cssselect('cite')[0].text
        self.chapter_decimal: Optional[int] = None

        ch = manga_element.cssselect('.simulpub-card__badge span')[0].text

        match = None
        if 'ex' in ch.lower():
            match = self.SPECIAL_RE.match(ch)

        if match:
            ch = match.groups()
            self.chapter_decimal = 5
        else:
            ch = ch.split('.')

        # If special chapter, set latest chapter to -1
        if 'ex' in ch[0].lower():
            latest_chapter = -1
        else:
            latest_chapter = int(ch[0])

        if len(ch) > 1:
            self.chapter_decimal = int(ch[1])
        self.author = manga_element.cssselect('.proper-noun')[0].text
        title_id = manga_element.cssselect(
            '.card__link')[0].attrib['href'].strip('/').split('/')[-1]
        self.sources = [
            Source(elem, self) for elem in manga_element.cssselect(
                '.simulpub-card__partners li a')
        ]

        self.release_date = datetime.utcnow()

Exemplo n.º 2

0

Exibir arquivo

Arquivo: html.py Projeto: adrianrocamora/manga-dl

 def _cssselect(cls, parser: ElementBase, selector) -> List[ElementBase]:
     if selector is None:
         return [parser]
     return parser.cssselect(selector)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: comixology.py Projeto: s0hv/manga-tracker

    def __init__(self,
                 chapter_element: etree.ElementBase,
                 group_id: Optional[int] = None):
        title = chapter_element.cssselect(
            '.content-info .content-subtitle')[0].text or ''
        title = title.strip()

        if title.lower().startswith('vol'):
            self.invalid = True
            return

        add_to_cart = chapter_element.cssselect(
            '.action-button span.action-title')[0].text
        if add_to_cart.lower() == 'pre-order':
            self.invalid = True
            return

        self.invalid = False

        manga_title = chapter_element.cssselect(
            '.content-title.cu-alc')[0].text

        ch = title.split('#')[-1].split('.')
        if not title:
            title = chapter_element.cssselect(
                '.content-info .content-title')[0].text or ''
            match = extra_regex.match(title)
            if match:
                ch = match.groups()
            elif not title.lower().endswith('extra'):
                logger.warning(
                    f'Empty title for {manga_title} actual title {title}. Might be an extra issue'
                )
            title = title.split(':')[-1] if ':' in title else 'Extra'

        special_match = extra_chapter_regex.match(ch[0])
        if special_match:
            ch = special_match.groups()

        try:
            chapter_number = int(ch[0] or 0)
        except ValueError:
            chapter_number = 0
        chapter_decimal = None
        if len(ch) > 1 and ch[1] is not None:
            chapter_decimal = int(ch[1])

        title = title
        self.url = chapter_element.cssselect(
            'a.content-details')[0].attrib['href']
        chapter_identifier = chapter_element.cssselect(
            'a.content-details')[0].attrib['href'].split('/')[-1]

        title_id = chapter_element.cssselect(
            '.action-button.expand-action')[0].attrib.get(
                'data-expand-menu-data', '')
        found = title_regex.findall(title_id)
        if not found:
            # Not all titles have title id set (probably only applies to newer titles).
            # If not mark as invalid and skip
            self.invalid = True
            logger.debug(f'Title id not found for {self.url}')
            return
            # raise ValueError('Title id not found for comiXology chapter')

        if len(found) > 1:
            logger.warning(f'Multiple title ids found for {self.url}')

        title_id = found[0]
        self.release_date_maybe: Optional[datetime] = None
        self._created_at = datetime.utcnow()

        super().__init__(chapter_title=title,
                         chapter_number=chapter_number,
                         chapter_identifier=chapter_identifier,
                         title_id=title_id,
                         volume=None,
                         decimal=chapter_decimal,
                         release_date=None,
                         manga_title=manga_title,
                         manga_url=None,
                         group=ComiXology.NAME,
                         group_id=group_id)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: comixology.py Projeto: s0hv/manga-tracker

 def get_chapter_elements(
         root: etree.ElementBase) -> List[etree.ElementBase]:
     return list(root.cssselect('li.content-item'))