Python eparse примеры использования

Язык программирования: Python

Пространство имен/Пакет: lxml.html

Метод/Функция: eparse

Примеров на hotexamples.com: 8

Python eparse - 8 примеров найдено. Это лучшие примеры Python кода для lxml.html.eparse, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: parse.py Проект: mudaoyh/NewJobs

def parse(page):
    try:
        t = eparse(page)
    except lxml.etree.ParserError:
        print sys.stderr, "page content error"
        return info

    info = {}

Пример #2

Показать файл

Файл: parse.py Проект: RyanWarm/NewJobs

def parse(page):
    try:
       t=eparse(page)
    except lxml.etree.ParserError:
       print sys.stderr, "page content error"
       return info

    info = {}

Пример #3

Показать файл

Файл: parse.py Проект: mudaoyh/NewJobs

def parse_csdn(page):
    try:
        t = eparse(page)
    except lxml.etree.ParserError:
        print sys.stderr, "page content error"
        return info
    r = []
    sel = []
    find = t.cssselect('div[class="per_dynamic"]')
    if find:
        titles = find[0].xpath('//a[contains(@href,"http:")]')
        for li in titles:
            r.append(li.text_content().strip().replace('\n', ' '))
    print "======", r
    sel2 = t.cssselect('div[class="position  education vevent vcard"]')
    for li in sel2:
        sel.append(li)

    for li in sel:
        item = {}
        find = li.cssselect('h3[class="summary fn org"]')
        if find:
            item['school'] = find[0].text_content().strip().replace('\n', ' ')

        find = li.cssselect('span[class="degree"]')
        if find:
            item['degree'] = find[0].text_content().strip().replace('\n', ' ')

        find = li.cssselect('span[class="major"]')
        if find:
            item['major'] = find[0].text_content().strip().replace('\n', ' ')

        find = li.cssselect('abbr[class="dtstart"]')
        if find:
            item['dtstart'] = find[0].text_content().strip().replace('\n', ' ')
        find = li.cssselect('abbr[class="dtstamp"]')
        if find:
            item['dtend'] = find[0].text_content().strip().replace('\n', ' ')

        find = li.cssselect('abbr[class="dtend"]')
        if find:
            item['dtend'] = find[0].text_content().strip().replace('\n', ' ')

        find = li.cssselect('p[class=" desc details-education"]')
        if find:
            item['desc'] = find[0].text_content().strip().replace('\n', ' ')

        find = li.cssselect('p[class="desc details-education"]')
        if find:
            item['activities'] = find[0].text_content().strip().replace(
                '\n', ' ')

        r.append(item)
    return r

Пример #4

Показать файл

Файл: parseCsdn.py Проект: mudaoyh/NewJobs

def parse(page):
    info = {}
    try:
        t=eparse(page)
    except lxml.etree.ParserError:
       print sys.stderr, "page content error"
       return info
    info = parse_profile(t)
    dynamic = parse_dynamic(t)
    info['dynamic'] = dynamic
    return info

Пример #5

Показать файл

Файл: parseCsdn.py Проект: mudaoyh/NewJobs

def parse_dir(t):
    r = []
    try:
        t=eparse(page)
    except lxml.etree.ParserError:
       print sys.stderr, "page content error"
       return r
    sel = t.cssselect('h2 strong a')
    for li in sel:
       r.append(li.attrib['href'])
    return r

Пример #6

Показать файл

Файл: parse.py Проект: RyanWarm/NewJobs

def parse_csdn(page):
    try:
        t=eparse(page)
    except lxml.etree.ParserError:
       print sys.stderr, "page content error"
       return info
    r = []
    sel = []
    find = t.cssselect('div[class="per_dynamic"]')
    if find:
        titles = find[0].xpath('//a[contains(@href,"http:")]')
        for li in titles:
            r.append(li.text_content().strip().replace('\n',' '))
    print "======",r
    sel2 = t.cssselect('div[class="position  education vevent vcard"]')
    for li in sel2:
        sel.append(li)

    for li in sel:
        item = {}
        find = li.cssselect('h3[class="summary fn org"]')
        if find:
            item['school'] = find[0].text_content().strip().replace('\n',' ')

        find = li.cssselect('span[class="degree"]')
        if find:
            item['degree'] = find[0].text_content().strip().replace('\n',' ')

        find = li.cssselect('span[class="major"]')
        if find:
            item['major'] = find[0].text_content().strip().replace('\n',' ')

        find = li.cssselect('abbr[class="dtstart"]')
        if find:
            item['dtstart'] = find[0].text_content().strip().replace('\n',' ')
        find = li.cssselect('abbr[class="dtstamp"]')
        if find:
            item['dtend'] = find[0].text_content().strip().replace('\n',' ')

        find = li.cssselect('abbr[class="dtend"]')
        if find:
            item['dtend'] = find[0].text_content().strip().replace('\n',' ')

        find = li.cssselect('p[class=" desc details-education"]')
        if find:
            item['desc'] = find[0].text_content().strip().replace('\n',' ')

        find = li.cssselect('p[class="desc details-education"]')
        if find:
            item['activities'] = find[0].text_content().strip().replace('\n',' ')

        r.append(item)
    return r

Пример #7

Показать файл

Файл: parse_detail_info.py Проект: RyanWarm/NewJobs

def parse(page):
    info = {}
    try:
        t=eparse(page)
    except lxml.etree.ParserError:
       print sys.stderr, "page content error"
       return info
    info = parse_addition(t)
    work = parse_work(t)
    info['work'] = work
    edu = parse_edu(t)
    info['edu'] = edu
    return info

Пример #8

Показать файл

Файл: parse_detail_info.py Проект: mudaoyh/NewJobs

def parse(page):
    info = {}
    try:
        t = eparse(page)
    except lxml.etree.ParserError:
        print sys.stderr, "page content error"
        return info
    info = parse_addition(t)
    work = parse_work(t)
    info['work'] = work
    edu = parse_edu(t)
    info['edu'] = edu
    return info