Python Parser.IncorrectFormat示例

编程语言: Python

命名空间/包名称: parsers.parser

类/类型: Parser

方法/功能: IncorrectFormat

hotexamples.com的示例: 2

Python Parser.IncorrectFormat - 已找到2个示例。这些是从开源项目中提取的最受好评的parsers.parser.Parser.IncorrectFormat现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

Parser(14)

parse(6)

IncorrectFormat(2)

P(1)

__subclasses__(1)

parse_logs(1)

parse_metadata(1)

run(1)

示例#1

显示文件

文件： html_parser.py 项目： mgorshkov/data_gathering

    def parse(self, data):
        soup = BeautifulSoup(data, 'html.parser')
        obj = soup.find('span', {'class': "header-profile-login"})
        if not obj:
            raise Parser.IncorrectFormat(data)

        name = obj.text.strip()

        object_list = soup.find_all('a')
        if not object_list:
            raise Parser.IncorrectFormat(data)

        num_books = 0
        for obj in object_list:
            m = re.fullmatch(r"Книги\s*(\d+)\s*", obj.text)
            if m:
                num_books = int(m.group(1))
                break

        object_list = soup.find_all('div', {'class': "group-row-title"})
        if not object_list:
            raise Parser.IncorrectFormat(data)

        birth = None
        death = None
        for obj in object_list:
            if not birth:
                m = re.fullmatch(r"(?:Родился|Родилась):\s*(.*)", obj.text)
                if m:
                    birth = m.group(1)
            if not death:
                m = re.fullmatch(r"(?:Умер|Умерла):\s*(.*)", obj.text)
                if m:
                    death = m.group(1)
            if birth and death:
                break

        birth_date, birth_place = self.parseDate(birth)
        birth_place = re.sub(r'\s+', ' ', birth_place).strip()
        death_date, death_place = self.parseDate(death)
        death_place = re.sub(r'\s+', ' ', death_place).strip()

        obj = soup.find('span', {
                'class': "stats-item marg-right",
                'title': 'Почитатели творчества'
            })

        adepts = int(obj.text if obj is not None else 0)

        obj = soup.find('span', {
                'class': "stats-item marg-right",
                'title': 'Читателей'
            })

        readers = int(obj.text if obj is not None else 0)

        return [
            name, birth_date, birth_place, death_date,
            death_place, num_books, adepts, readers]

示例#2

显示文件

文件： html_parser.py 项目： mgorshkov/data_gathering

    def parse(self, data):
        """
        Parses html text and extracts field values
        :param data: html text (page)
        :return: a list of urls with author data
        plus continuation flag
        """
        soup = BeautifulSoup(data, 'html.parser')

        # extract href from
        # <a class=\"arow-name c-black\" href=\"\/author\/30230\">...</a>
        object_list = soup.find_all('a', {'class': 'arow-name c-black'})
        if not object_list:
            raise Parser.IncorrectFormat(data)

        return [x.get('href') for x in object_list]