Python extractDate示例，pyopenmensa.feed.extractDate Python示例

示例#1

0

显示文件

def parse_week(url, canteen):
    document = parse(urlopen(url).read())
    for day_table in document.find_all('table', 'speiseplan'):
        try:
            date = extractDate(day_table.thead.tr.th.text)
        except ValueError:
            # There was no valid date in the table header, which happens eg
            # for special "Aktionswoche" tables.
            # TODO: check if this table contains any meals, which was not the
            #       case when it was used for the first time.
            continue
        if day_table.find('td', 'keinangebot'):
            canteen.setDayClosed(date)
            continue
        for meal_tr in day_table.tbody.children:
            if len(meal_tr.find_all('a') or []) < 1:
                continue
            name = meal_tr.td.text
            if ': ' in name:
                category, name = name.split(': ', 1)
            else:
                category = 'Angebote'
            if len(name) > 200:
                name = name[:200] + ' ...'
            notes = []
            for img in meal_tr.contents[1].find_all('img'):
                notes.append(img['title'])
            canteen.addMeal(date, category, name, notes,
                            price_regex.findall(meal_tr.contents[2].text),
                            roles)

示例#2

0

显示文件

文件： dresden.py 项目： mswart/openmensa-parsers

def parse_week(url, canteen):
    document = parse(urlopen(url).read())
    for day_table in document.find_all("table", "speiseplan"):
        try:
            date = extractDate(day_table.thead.tr.th.text)
        except ValueError:
            # There was no valid date in the table header, which happens eg
            # for special "Aktionswoche" tables.
            # TODO: check if this table contains any meals, which was not the
            #       case when it was used for the first time.
            continue
        if day_table.find("td", "keinangebot"):
            canteen.setDayClosed(date)
            continue
        for meal_tr in day_table.tbody.children:
            if len(meal_tr.find_all("a") or []) < 1:
                continue
            name = meal_tr.td.text
            if ": " in name:
                category, name = name.split(": ", 1)
            else:
                category = "Angebote"
            if len(name) > 200:
                name = name[:200] + " ..."
            notes = []
            for img in meal_tr.contents[1].find_all("img"):
                notes.append(img["title"])
            canteen.addMeal(date, category, name, notes, price_regex.findall(meal_tr.contents[2].text), roles)

示例#3

0

显示文件

def parse_week(url, canteen):
    data = urlopen(url).read().decode('utf-8')
    document = parse(data, 'lxml')

    # The day plans are in a div with no special class or id. Thus
    # we try to find a div with a heading "Speiseplan "
    for week_heading in document(class_='swdd-ueberschrift',
                                 text=speiseplan_regex):
        week_div = week_heading.parent

        # The meals for each day a in card. Again there is no class or id to
        # select the meal cards. Thus we lookung for all card with a card-header
        # which stores the date
        for card_header in week_div.find_all(class_='card-header'):
            day_card = card_header.parent

            try:
                date = extractDate(card_header.text)
            except ValueError:
                # There was no valid date in the table header, which happens eg
                # for special "Aktionswoche" cards.
                # TODO: check if this card contains any meals, which was not the
                #       case when it was used for the first time.
                continue

            # Check if there is a "kein Angebot" item
            if day_card.find(class_='list-group-item', text=kein_angebot_regex):
                canteen.setDayClosed(date)
                continue

            # Iterate over the list-group-item within the card which are used
            # for individual meals
            for meal in day_card.find_all(class_='list-group-item'):

                name = meal.find(name='span')
                if name is not None:
                    name = name.text
                else:
                    continue

                if ': ' in name:
                    category, name = name.split(': ', 1)
                else:
                    category = 'Angebote'

                notes = [img['alt'] for img in meal.find_all(class_='swdd-spl-symbol')]

                if '* ' in name:
                    name, note = name.split('* ', 1)
                    notes.append(note)

                if meal.strong is not None:
                    prices = price_regex.findall(meal.strong.text)
                else:
                    prices = []

                canteen.addMeal(date, category, name, notes,
                                prices, roles)

示例#4

0

显示文件

def parse_week(url, canteen, type, allergene={}, zusatzstoffe={}):
    document = parse(urlopen(url).read(), 'lxml')
    for day_table in document.find_all('table', 'swbs_speiseplan'):
        caption = day_table.find('th', 'swbs_speiseplan_head').text
        if type not in caption:
            continue
        date = extractDate(caption)
        meals = day_table.find_all('tr')
        pos = 0
        while pos < len(meals):
            meal_tr = meals[pos]
            if not meal_tr.find('td'):  # z.B Headline
                pos += 1
                continue
            tds = meal_tr.find_all('td')
            category = re.sub(r' \(\d\)', '', tds[0].text.strip())
            name = tds[1].text.strip()
            if tds[1].find('a', href='http://www.stw-on.de/mensavital'):
                notes = ['MensaVital']
            else:
                notes = []
            for img in tds[2].find_all('img'):
                title = img['title']
                if ':' in title:
                    kind, value = title.split(':')
                    if kind == 'Allergene':
                        for allergen in value.split(','):
                            notes.append(
                                allergene.get(allergen.strip())
                                or allergene[allergen.strip()[:-1]])
                    elif kind == 'Zusatzstoffe':
                        for zusatzstoff in value.split(','):
                            notes.append(zusatzstoffe[zusatzstoff.strip()])
                    else:
                        print('Unknown image type "{}"'.format(kind))
                else:
                    notes.append(title.replace('enthält ', ''))
            prices = {
                'student': tds[3].text.strip(),
                'employee': tds[4].text.strip(),
                'other': tds[5].text.strip()
            }
            if pos < len(meals) - 1:
                nextTds = meals[pos + 1].find_all('td')
                if nextTds[0].text.strip() == '':
                    pos += 1
                    for img in nextTds[1].find_all('img'):
                        notes.append(img['title'])
            pos += 1
            canteen.addMeal(date, category or 'Sonstiges', name, notes, prices)

示例#5

0

显示文件

文件： ostniedersachsen.py 项目： azrdev/openmensa-parsers

def parse_week(url, canteen, type, allergene={}, zusatzstoffe={}):
    document = parse(urlopen(url).read(), 'lxml')
    for day_table in document.find_all('table', 'swbs_speiseplan'):
        caption = day_table.find('th', 'swbs_speiseplan_head').text
        if type not in caption:
            continue
        date = extractDate(caption)
        meals = day_table.find_all('tr')
        pos = 0
        while pos < len(meals):
            meal_tr = meals[pos]
            if not meal_tr.find('td'):  # z.B Headline
                pos += 1
                continue
            tds = meal_tr.find_all('td')
            category = re.sub(r' \(\d\)', '', tds[0].text.strip())
            name = tds[1].text.strip()
            if tds[1].find('a', href='http://www.stw-on.de/mensavital'):
                notes = ['MensaVital']
            else:
                notes = []
            for img in tds[2].find_all('img'):
                title = img['title']
                if ':' in title:
                    kind, value = title.split(':')
                    if kind == 'Allergene':
                        for allergen in value.split(','):
                            notes.append(allergene.get(allergen.strip()) or allergene[allergen.strip()[:-1]])
                    elif kind == 'Zusatzstoffe':
                        for zusatzstoff in value.split(','):
                            notes.append(zusatzstoffe[zusatzstoff.strip()])
                    else:
                        print('Unknown image type "{}"'.format(kind))
                else:
                    notes.append(title.replace('enthält ', ''))
            prices = {
                'student':  tds[3].text.strip(),
                'employee': tds[4].text.strip(),
                'other':    tds[5].text.strip()
            }
            if pos < len(meals) - 1:
                nextTds = meals[pos+1].find_all('td')
                if nextTds[0].text.strip() == '':
                    pos += 1
                    for img in nextTds[1].find_all('img'):
                        notes.append(img['title'])
            pos += 1
            canteen.addMeal(date, category or 'Sonstiges', name, notes, prices)

示例#6

0

显示文件

文件： ostniedersachsen.py 项目： mswart/openmensa-parsers

def parse_week(url, canteen, type, allergene={}, zusatzstoffe={}):
    document = parse(urlopen(url).read())
    for day_table in document.find_all("table", "swbs_speiseplan"):
        caption = day_table.find("th", "swbs_speiseplan_head").text
        if type not in caption:
            continue
        date = extractDate(caption)
        meals = day_table.find_all("tr")
        pos = 0
        while pos < len(meals):
            meal_tr = meals[pos]
            if not meal_tr.find("td"):  # z.B Headline
                pos += 1
                continue
            tds = meal_tr.find_all("td")
            category = re.sub(r" \(\d\)", "", tds[0].text.strip())
            name = tds[1].text.strip()
            if tds[1].find("a", href="http://www.stw-on.de/mensavital"):
                notes = ["MensaVital"]
            else:
                notes = []
            for img in tds[2].find_all("img"):
                title = img["title"]
                if ":" in title:
                    kind, value = title.split(":")
                    if kind == "Allergene":
                        for allergen in value.split(","):
                            notes.append(allergene.get(allergen.strip()) or allergene[allergen.strip()[:-1]])
                    elif kind == "Zusatzstoffe":
                        for zusatzstoff in value.split(","):
                            notes.append(zusatzstoffe[zusatzstoff.strip()])
                    else:
                        print('Unknown image type "{}"'.format(kind))
                else:
                    notes.append(title.replace("enthält ", ""))
            prices = {"student": tds[3].text.strip(), "employee": tds[4].text.strip(), "other": tds[5].text.strip()}
            if pos < len(meals) - 1:
                nextTds = meals[pos + 1].find_all("td")
                if nextTds[0].text.strip() == "":
                    pos += 1
                    for img in nextTds[1].find_all("img"):
                        notes.append(img["title"])
            pos += 1
            canteen.addMeal(date, category, name, notes, prices)

示例#7

0

显示文件

文件： dresden.py 项目： a-andre/openmensa-parsers

def parse_week(url, canteen):
    document = parse(urlopen(url).read())
    for day_table in document.find_all('table', 'speiseplan'):
        date = extractDate(day_table.thead.tr.th.text)
        if day_table.find('td', 'keinangebot'):
            canteen.setDayClosed(date)
            continue
        for meal_tr in day_table.tbody.children:
            if len(meal_tr.find_all('a') or []) < 1:
                continue
            name = meal_tr.td.text
            if ': ' in name:
                category, name = name.split(': ', 1)
            else:
                category = 'Angebote'
            if len(name) > 200:
                name = name[:200] + ' ...'
            notes = []
            for img in meal_tr.contents[1].find_all('img'):
                notes.append(img['title'])
            canteen.addMeal(date, category, name, notes,
                            price_regex.findall(meal_tr.contents[2].text), roles)

示例#8

0

显示文件

def parse_dish(dish, canteen):

    date = extractDate(dish['data-date'])

    name = dish.find(class_='neo-menu-single-title')
    if name is not None:
        notes = set(x['title'] for x in name.find_all(name='abbr'))
    else:
        return

    name = re.sub(notes_regex, '', name.text.strip())
    if len(name) == 0:
        return

    # Fix formating issues:
    name = re.sub(whitspace_regex, ' ', name)  # Multiple Whitespace
    name = re.sub(comma_regex, ', ',
                  name.strip(', '))  # No whitspace after comma
    name = re.sub(bracket_regex, ' (', name)

    category = dish.find(class_='neo-menu-single-type')
    if category is not None:
        category = category.text
    elif dish.find_previous(name='h2') is not None:
        # A side
        category = 'Beilagen: ' + dish.find_previous(
            name='h2').text.capitalize()
    else:
        # Just in case
        category = 'Unbekannt'

    price = dish.find(class_='neo-menu-single-price')
    if price is not None:
        prices = price_regex.findall(price.text)
    else:
        prices = {}

    canteen.addMeal(date, category, name, notes, prices, roles)
    return

示例#9

0

显示文件

文件： dresden.py 项目： mlewe/openmensa-parsers

def parse_week(url, canteen):
    document = parse(urlopen(url).read())
    for day_table in document.find_all('table', 'speiseplan'):
        date = extractDate(day_table.thead.tr.th.text)
        if day_table.find('td', 'keinangebot'):
            canteen.setDayClosed(date)
            continue
        for meal_tr in day_table.tbody.children:
            if len(meal_tr.find_all('a') or []) < 1:
                continue
            name = meal_tr.td.text
            if ': ' in name:
                category, name = name.split(': ', 1)
            else:
                category = 'Angebote'
            if len(name) > 200:
                name = name[:200] + ' ...'
            notes = []
            for img in meal_tr.contents[1].find_all('img'):
                notes.append(img['title'])
            canteen.addMeal(date, category, name, notes,
                            price_regex.findall(meal_tr.contents[2].text), roles)

示例#10

0

显示文件

文件： ostniedersachsen.py 项目： steeb/openmensa-parsers

def parse_week(url, canteen, type):
    document = parse(urlopen(url).read())
    for day_table in document.find_all('table', 'swbs_speiseplan'):
        caption = day_table.find('th', 'swbs_speiseplan_head').text
        if type not in caption:
            continue
        date = extractDate(caption)
        for meal_tr in day_table.find_all('tr'):
            if not meal_tr.find('td'):  # z.B Headline
                continue
            tds = meal_tr.find_all('td')
            category = tds[0].text.strip()
            name = tds[1].text
            if tds[1].find('a', href='http://www.stw-on.de/mensavital'):
                notes = ['MensaVital']
            else:
                notes = []
            prices = {
                'student':  tds[2].text,
                'employee': tds[3].text,
                'other':    tds[4].text
            }
            canteen.addMeal(date, category, name, notes, prices)

示例#11

0

显示文件

文件： marburg.py 项目： mswart/openmensa-parsers

def parse_dish(dish, canteen):

    date = extractDate(dish['data-date'])

    name = dish.find(class_='neo-menu-single-title')
    if name is not None:
        notes = set(x['title'] for x in name.find_all(name='abbr'))
    else:
        return

    name = re.sub(notes_regex, '', name.text.strip())
    if len(name) == 0:
        return

    # Fix formating issues:
    name = re.sub(whitspace_regex, ' ', name)  # Multiple Whitespace
    name = re.sub(comma_regex, ', ', name.strip(', '))  # No whitspace after comma
    name = re.sub(bracket_regex, ' (', name)

    category = dish.find(class_='neo-menu-single-type')
    if category is not None:
        category = category.text
    elif dish.find_previous(name='h2') is not None:
        # A side
        category = 'Beilagen: ' + dish.find_previous(name='h2').text.capitalize()
    else:
        # Just in case
        category = 'Unbekannt'

    price = dish.find(class_='neo-menu-single-price')
    if price is not None:
        prices = price_regex.findall(price.text)
    else:
        prices = {}

    canteen.addMeal(date, category, name, notes, prices, roles)
    return

示例#12

0

显示文件