Пример #1
0
class Cookbook(Item):
    title = Text('h2.title')
    img = Attr('.recipe-content > div > div > div > a > img', 'src')
    browse_count = Text('div.vcnum > span')
    collect_count = Text('div.vcnum > span.collectnum')
    intro = Text('p.intro')
    tip = Text('div.tips > p')
Пример #2
0
class Repic(Item):
    img = Attr('a.cook-img', 'style')
    url = Attr('a.cook-img', 'href')
    title = Text('div.cook-info > a.cookname')
    major = Text('div.cook-info > p.major')

    def clean_img(self, img):
        re_img = re.compile(
            'background: url[(](.*)[)] no-repeat center center;background-size: cover;position: relative;'
        )
        return re_img.match(img).groups()[0]
Пример #3
0
class CourseSeats(Item):
    class_num = Text(".classNbrColumnValue > .course-details-link")
    total_seats = Text(".availableSeatsColumnValue")
    open_seats = total_seats
    _tmp_seats = None

    def clean_class_num(self, value):
        return value.strip()

    def clean_total_seats(self, value):
        if not self._tmp_seats:
            self._tmp_seats = value.split()
        return int(self._tmp_seats[2])

    def clean_open_seats(self, value):
        if not self._tmp_seats:
            self._tmp_seats = value.split()
        return int(self._tmp_seats[0])
Пример #4
0
class ClassSeats(Item):
    total_seats = Text("#details-side-panel > span")
    open_seats = total_seats
    _tmp_seats = None

    def clean_total_seats(self, value):
        if not self._tmp_seats:
            self._tmp_seats = value.split()
        return int(self._tmp_seats[4])

    def clean_open_seats(self, value):
        if not self._tmp_seats:
            self._tmp_seats = value.split()
        return int(self._tmp_seats[2])
Пример #5
0
class Class(Item):
    department = Text("h2")
    course = department
    title = department
    # school = Text(".row > .col-md-7 > span > a")
    # instructor = Attr(".nametip", "title")
    _tmp_course = None

    def clean_department(self, value):
        if not self._tmp_course:
            self._tmp_course = value.split()
            print(f"self._tmp_course = {self._tmp_course}")
        return self._tmp_course[0]

    def clean_course(self, value):
        if not self._tmp_course:
            self._tmp_course = value.split()
        return self._tmp_course[1]

    def clean_title(self, value):
        if not self._tmp_course:
            self._tmp_course = value.split()
        return ' '.join(self._tmp_course[3:])
Пример #6
0
import requests
from htmlparsing import Element, HTMLParsing, Text, Attr, Parse

url = 'http://localhost:8082/home/serveList.html'
r = requests.get(url)
article_detail = HTMLParsing(r.text).detail({
    'title':
    Text('a.storylink'),
    'points':
    Parse('span.score', '>{} points'),
    'link':
    Attr('a.storylink', 'href')
})
print(article_detail)
Пример #7
0
class Post(Item):
    url = Attr('.read-more', 'href')
    title = Text('h1 > a')
Пример #8
0
 class Post(Item):
     url = Attr('.storylink', 'no this attribute')
     title = Text('.storylink')
Пример #9
0
 class Post(Item):
     url = Attr('.storylink', 'href')
     title = Text('.storylink')
Пример #10
0
class Post(Item):
    url = Attr(".storylink", "href")
    title = Text(".storylink")
Пример #11
0
 class Post(Item):
     url = Attr(".storylink", "no this attribute")
     title = Text(".storylink")
Пример #12
0
class Course(Item):
    url = Attr('a', 'href')
    title = Text('h4')
Пример #13
0
class Ingredients(Item):
    ingredient = Text('span')
    weight = Text('span.right')
Пример #14
0
class Step(Item):
    img = Attr('img', 'src')
    step = Text('p')
Пример #15
0
class Selected(Item):
    title = Text('.name')
    img = Attr('a > img', 'src')
    url = Attr('.name', 'href')
Пример #16
0
"""<Result ('Skip to content',) {}>"""

# Get content or html
print(e.xpath('//a')[5].text)
"""PyPI"""

print(e.xpath('//a')[5].html)
"""<a href="https://pypi.python.org/" title="Python Package Index">PyPI</a>"""

print(e.xpath('//a')[5].markdown)
"""[PyPI](https://pypi.python.org/ "Python Package Index")"""

url = 'https://news.ycombinator.com/'
r = requests.get(url)
article_list = HTMLParsing(r.text).list('.athing', {
    'title': Text('a.storylink'),
    'link': Attr('a.storylink', 'href')
})
print(article_list)

url = 'https://news.ycombinator.com/item?id=16476454'
r = requests.get(url)
article_detail = HTMLParsing(r.text).detail({
    'title':
    Text('a.storylink'),
    'points':
    Parse('span.score', '>{} points'),
    'link':
    Attr('a.storylink', 'href')
})
print(article_detail)