Python __parse примеры использования

Язык программирования: Python

Пространство имен/Пакет: lxml.html

Метод/Функция: __parse

Примеров на hotexamples.com: 8

Python __parse - 8 примеров найдено. Это лучшие примеры Python кода для lxml.html.__parse, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: scrape.py Проект: thethiny/BackEnd

def term(page):
    data = {}
    # Store all tables with "datadisplaytable" class
    tables = iter(__parse(page).findall(".//table[@class='datadisplaytable']"))
    # Loop through every two tables as one (head and body)
    for head, body in zip(tables, tables):
        # Split table caption into three parts ["title", "key", "section"]
        title, course_key, section = head.find("caption").text.split(" - ")
        # Remove spaces from course key
        course_key = course_key.replace(" ", "")
        # Store all table head cells and body rows into arrays
        cells, rows = head.findall(".//td"), body.findall("tr")
        # Combine all course data
        course_data = dict(
            {
                "title": title,
                "section": section,
                "crn": int(cells[1].text),
                "ch": int(cells[5].text.strip()[0])
                # Get & add lecture/lab details
            },
            **__get_data(rows, title))
        # If course key is new to term
        if data.get(course_key) is None:
            # Store the course with that key
            data[course_key] = course_data
        # If the course already exists
        else:  # Store it as a lab of the previous course
            data[course_key]["lab"] = course_data
    return data

Пример #2

Показать файл

Файл: scrape.py Проект: thethiny/BackEnd

def course(page):
    # Store main table's caption and body
    caption, body = __parse(page).find(
        ".//table[@class='datadisplaytable']").findall("tr")
    # Split caption to get course's title, crn, key and section
    title, crn, course_key, section = caption.text_content().strip().split(
        " - ")
    # Return course data dictionary
    return {  # In {course key: course data} format
        course_key.replace(" ", ""):
        dict(
            {
                "title":
                title,
                "section":
                section,
                "crn":
                int(crn),
                # Credits hours value isn't structured, so search among the strings to find it
                "ch":
                int(
                    body.xpath("td/text()[contains(., 'Credits')]")[0].strip()
                    [0])
                # Get & add lecture/lab details
            },
            **__get_data(body.findall(".//tr"), title))
    }

Пример #3

Показать файл

Файл: scrape.py Проект: thethiny/BackEnd

def registered_terms(page):
    return {
        # Return a dictionary that contains {term code: term name} pairs
        __get_term_code(term_name.text): term_name.text
        # Loop through tags which contain term name
        for term_name in __parse(page).findall(
            ".//span[@class='fieldOrangetextbold']")
    }

Пример #4

Показать файл

Файл: homepage.py Проект: thethiny/BackEnd

def all_terms(response):
    # Initialize terms dictionary
    terms = {}
    # Loop through terms in academic calendar
    for term in __parse(response).findall(".//div[@class='pageTurn']/div/div"):
        # Split(" ") term & store it's season and year
        season, _, year = term.find("label").text.split()
        # Add term to terms dictionary as a {term name: term code} pair
        terms[season + " " +
              year.replace("/", "-")] = year[:4] + seasons_codes[season]
    return terms

Пример #5

Показать файл

Файл: scrape.py Проект: thethiny/BackEnd

def student_details(page):
    # Extract tables from page and store needed cells
    cells = __parse(page).findall(".//table[@class='datadisplaytable']/tr/td")
    # Return student's registration term, collage and major
    return {
        "term":
        __get_term_code(cells[2].text),
        "college":
        cells[4].text,
        "major":
        f"{cells[3].text} in {cells[5].text}".replace("Undergraduate in ", "")
    }

Пример #6

Показать файл

Файл: scrape.py Проект: thethiny/BackEnd

def holds(page):
    data = []
    # Loop through holds table's rows which contain data
    for hold in __parse(page).findall(
            ".//table[@class='datadisplaytable']/tr")[1:]:
        # Store hold's row cells
        cells = hold.findall("td")
        # Add holds type, reason, start & end dates
        data.append({
            "type": cells[0].text,
            "start": cells[1].text,
            "end": cells[2].text,
            "reason": cells[4].text
        })
    return data

Пример #7

Показать файл

Файл: scrape.py Проект: thethiny/BackEnd

def final_exams(page):
    data = []
    # Loop through finals table's rows which contain data
    for final in __parse(page).findall(
            ".//table[@class='datadisplaytable'][2]/tr")[1:]:
        # Store final's row cells
        cells = final.findall("td")
        # If final's date is announced (not all asterisk)
        if cells and any(letter != "*" for letter in cells[2].text):
            # Add final course key, title, date, start & end time and location
            data.append({
                "course": cells[0].text,
                "date": cells[2].text,
                "start": cells[3].text,
                "end": cells[4].text,
                "location": __extract_location(cells[5].text.split())
            })
    return data

Пример #8

Показать файл

Файл: homepage.py Проект: thethiny/BackEnd

def term_events(response, term_code):
    # Initialize events and store year
    year, events = term_code[:4], []
    # Define a function to clean cell from whitespaces
    clean = lambda cell: re.sub(" +", " ", cell.strip())
    # Format term name in "Fall Semester 2017/2017" format from term code
    term_name = f"{seasons_codes[term_code[4:]]} {year}/{int(year) + 1}"
    # Loop through available terms calendars
    for term in __parse(response).findall(".//div[@class='pageTurn']/div/div"):
        # If calendar's label matches requested term name
        if term.find("label").text == term_name:
            # Loop through it's events (table rows)
            for event in term.findall(".//tbody/tr")[2:]:
                # Store event row's cells
                cells = event.findall("td")
                # Add event's date and text to events array
                events.append({
                    "date":
                    clean(re.sub("(Till|-|–)", " - ",
                                 cells[1].text_content())).split(',')[0],
                    "text":
                    clean(cells[3].text_content())
                })
            return events