Python Text示例

编程语言: Python

命名空间/包名称: htmlparsing

类/类型: Text

hotexamples.com的示例: 16

Python Text - 已找到16个示例。这些是从开源项目中提取的最受好评的htmlparsing.Text现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

Text(16)

常用方法

Text (16)

示例#1

显示文件

文件： app.py 项目： 9233/toapi-douguo

class Cookbook(Item):
    title = Text('h2.title')
    img = Attr('.recipe-content > div > div > div > a > img', 'src')
    browse_count = Text('div.vcnum > span')
    collect_count = Text('div.vcnum > span.collectnum')
    intro = Text('p.intro')
    tip = Text('div.tips > p')

示例#2

显示文件

文件： app.py 项目： 9233/toapi-douguo

class Repic(Item):
    img = Attr('a.cook-img', 'style')
    url = Attr('a.cook-img', 'href')
    title = Text('div.cook-info > a.cookname')
    major = Text('div.cook-info > p.major')

    def clean_img(self, img):
        re_img = re.compile(
            'background: url[(](.*)[)] no-repeat center center;background-size: cover;position: relative;'
        )
        return re_img.match(img).groups()[0]

示例#3

显示文件

class CourseSeats(Item):
    class_num = Text(".classNbrColumnValue > .course-details-link")
    total_seats = Text(".availableSeatsColumnValue")
    open_seats = total_seats
    _tmp_seats = None

    def clean_class_num(self, value):
        return value.strip()

    def clean_total_seats(self, value):
        if not self._tmp_seats:
            self._tmp_seats = value.split()
        return int(self._tmp_seats[2])

    def clean_open_seats(self, value):
        if not self._tmp_seats:
            self._tmp_seats = value.split()
        return int(self._tmp_seats[0])

示例#4

显示文件

class ClassSeats(Item):
    total_seats = Text("#details-side-panel > span")
    open_seats = total_seats
    _tmp_seats = None

    def clean_total_seats(self, value):
        if not self._tmp_seats:
            self._tmp_seats = value.split()
        return int(self._tmp_seats[4])

    def clean_open_seats(self, value):
        if not self._tmp_seats:
            self._tmp_seats = value.split()
        return int(self._tmp_seats[2])

示例#5

显示文件

class Class(Item):
    department = Text("h2")
    course = department
    title = department
    # school = Text(".row > .col-md-7 > span > a")
    # instructor = Attr(".nametip", "title")
    _tmp_course = None

    def clean_department(self, value):
        if not self._tmp_course:
            self._tmp_course = value.split()
            print(f"self._tmp_course = {self._tmp_course}")
        return self._tmp_course[0]

    def clean_course(self, value):
        if not self._tmp_course:
            self._tmp_course = value.split()
        return self._tmp_course[1]

    def clean_title(self, value):
        if not self._tmp_course:
            self._tmp_course = value.split()
        return ' '.join(self._tmp_course[3:])

示例#6

显示文件

import requests
from htmlparsing import Element, HTMLParsing, Text, Attr, Parse

url = 'http://localhost:8082/home/serveList.html'
r = requests.get(url)
article_detail = HTMLParsing(r.text).detail({
    'title':
    Text('a.storylink'),
    'points':
    Parse('span.score', '>{} points'),
    'link':
    Attr('a.storylink', 'href')
})
print(article_detail)

示例#7

显示文件

文件： blog_api.py 项目： rheehot/learning-code

class Post(Item):
    url = Attr('.read-more', 'href')
    title = Text('h1 > a')

示例#8

显示文件

 class Post(Item):
     url = Attr('.storylink', 'no this attribute')
     title = Text('.storylink')

示例#9

显示文件

 class Post(Item):
     url = Attr('.storylink', 'href')
     title = Text('.storylink')

示例#10

显示文件

class Post(Item):
    url = Attr(".storylink", "href")
    title = Text(".storylink")

示例#11

显示文件

 class Post(Item):
     url = Attr(".storylink", "no this attribute")
     title = Text(".storylink")

示例#12

显示文件

class Course(Item):
    url = Attr('a', 'href')
    title = Text('h4')

示例#13

显示文件

文件： app.py 项目： 9233/toapi-douguo

class Ingredients(Item):
    ingredient = Text('span')
    weight = Text('span.right')

示例#14

显示文件

文件： app.py 项目： 9233/toapi-douguo

class Step(Item):
    img = Attr('img', 'src')
    step = Text('p')

示例#15

显示文件

文件： app.py 项目： 9233/toapi-douguo

class Selected(Item):
    title = Text('.name')
    img = Attr('a > img', 'src')
    url = Attr('.name', 'href')

示例#16

显示文件

"""<Result ('Skip to content',) {}>"""

# Get content or html
print(e.xpath('//a')[5].text)
"""PyPI"""

print(e.xpath('//a')[5].html)
"""<a href="https://pypi.python.org/" title="Python Package Index">PyPI</a>"""

print(e.xpath('//a')[5].markdown)
"""[PyPI](https://pypi.python.org/ "Python Package Index")"""

url = 'https://news.ycombinator.com/'
r = requests.get(url)
article_list = HTMLParsing(r.text).list('.athing', {
    'title': Text('a.storylink'),
    'link': Attr('a.storylink', 'href')
})
print(article_list)

url = 'https://news.ycombinator.com/item?id=16476454'
r = requests.get(url)
article_detail = HTMLParsing(r.text).detail({
    'title':
    Text('a.storylink'),
    'points':
    Parse('span.score', '>{} points'),
    'link':
    Attr('a.storylink', 'href')
})
print(article_detail)