Exemplo n.º 1
0
def parse_week(url, canteen):
    document = parse(urlopen(url).read())
    for day_table in document.find_all('table', 'speiseplan'):
        try:
            date = extractDate(day_table.thead.tr.th.text)
        except ValueError:
            # There was no valid date in the table header, which happens eg
            # for special "Aktionswoche" tables.
            # TODO: check if this table contains any meals, which was not the
            #       case when it was used for the first time.
            continue
        if day_table.find('td', 'keinangebot'):
            canteen.setDayClosed(date)
            continue
        for meal_tr in day_table.tbody.children:
            if len(meal_tr.find_all('a') or []) < 1:
                continue
            name = meal_tr.td.text
            if ': ' in name:
                category, name = name.split(': ', 1)
            else:
                category = 'Angebote'
            if len(name) > 200:
                name = name[:200] + ' ...'
            notes = []
            for img in meal_tr.contents[1].find_all('img'):
                notes.append(img['title'])
            canteen.addMeal(date, category, name, notes,
                            price_regex.findall(meal_tr.contents[2].text),
                            roles)
Exemplo n.º 2
0
def parse_week(url, canteen):
    document = parse(urlopen(url).read())
    for day_table in document.find_all("table", "speiseplan"):
        try:
            date = extractDate(day_table.thead.tr.th.text)
        except ValueError:
            # There was no valid date in the table header, which happens eg
            # for special "Aktionswoche" tables.
            # TODO: check if this table contains any meals, which was not the
            #       case when it was used for the first time.
            continue
        if day_table.find("td", "keinangebot"):
            canteen.setDayClosed(date)
            continue
        for meal_tr in day_table.tbody.children:
            if len(meal_tr.find_all("a") or []) < 1:
                continue
            name = meal_tr.td.text
            if ": " in name:
                category, name = name.split(": ", 1)
            else:
                category = "Angebote"
            if len(name) > 200:
                name = name[:200] + " ..."
            notes = []
            for img in meal_tr.contents[1].find_all("img"):
                notes.append(img["title"])
            canteen.addMeal(date, category, name, notes, price_regex.findall(meal_tr.contents[2].text), roles)
Exemplo n.º 3
0
def parse_week(url, canteen):
    data = urlopen(url).read().decode('utf-8')
    document = parse(data, 'lxml')

    # The day plans are in a div with no special class or id. Thus
    # we try to find a div with a heading "Speiseplan "
    for week_heading in document(class_='swdd-ueberschrift',
                                 text=speiseplan_regex):
        week_div = week_heading.parent

        # The meals for each day a in card. Again there is no class or id to
        # select the meal cards. Thus we lookung for all card with a card-header
        # which stores the date
        for card_header in week_div.find_all(class_='card-header'):
            day_card = card_header.parent

            try:
                date = extractDate(card_header.text)
            except ValueError:
                # There was no valid date in the table header, which happens eg
                # for special "Aktionswoche" cards.
                # TODO: check if this card contains any meals, which was not the
                #       case when it was used for the first time.
                continue

            # Check if there is a "kein Angebot" item
            if day_card.find(class_='list-group-item', text=kein_angebot_regex):
                canteen.setDayClosed(date)
                continue

            # Iterate over the list-group-item within the card which are used
            # for individual meals
            for meal in day_card.find_all(class_='list-group-item'):

                name = meal.find(name='span')
                if name is not None:
                    name = name.text
                else:
                    continue

                if ': ' in name:
                    category, name = name.split(': ', 1)
                else:
                    category = 'Angebote'

                notes = [img['alt'] for img in meal.find_all(class_='swdd-spl-symbol')]

                if '* ' in name:
                    name, note = name.split('* ', 1)
                    notes.append(note)

                if meal.strong is not None:
                    prices = price_regex.findall(meal.strong.text)
                else:
                    prices = []

                canteen.addMeal(date, category, name, notes,
                                prices, roles)
Exemplo n.º 4
0
def parse_week(url, canteen, type, allergene={}, zusatzstoffe={}):
    document = parse(urlopen(url).read(), 'lxml')
    for day_table in document.find_all('table', 'swbs_speiseplan'):
        caption = day_table.find('th', 'swbs_speiseplan_head').text
        if type not in caption:
            continue
        date = extractDate(caption)
        meals = day_table.find_all('tr')
        pos = 0
        while pos < len(meals):
            meal_tr = meals[pos]
            if not meal_tr.find('td'):  # z.B Headline
                pos += 1
                continue
            tds = meal_tr.find_all('td')
            category = re.sub(r' \(\d\)', '', tds[0].text.strip())
            name = tds[1].text.strip()
            if tds[1].find('a', href='http://www.stw-on.de/mensavital'):
                notes = ['MensaVital']
            else:
                notes = []
            for img in tds[2].find_all('img'):
                title = img['title']
                if ':' in title:
                    kind, value = title.split(':')
                    if kind == 'Allergene':
                        for allergen in value.split(','):
                            notes.append(
                                allergene.get(allergen.strip())
                                or allergene[allergen.strip()[:-1]])
                    elif kind == 'Zusatzstoffe':
                        for zusatzstoff in value.split(','):
                            notes.append(zusatzstoffe[zusatzstoff.strip()])
                    else:
                        print('Unknown image type "{}"'.format(kind))
                else:
                    notes.append(title.replace('enthält ', ''))
            prices = {
                'student': tds[3].text.strip(),
                'employee': tds[4].text.strip(),
                'other': tds[5].text.strip()
            }
            if pos < len(meals) - 1:
                nextTds = meals[pos + 1].find_all('td')
                if nextTds[0].text.strip() == '':
                    pos += 1
                    for img in nextTds[1].find_all('img'):
                        notes.append(img['title'])
            pos += 1
            canteen.addMeal(date, category or 'Sonstiges', name, notes, prices)
Exemplo n.º 5
0
def parse_week(url, canteen, type, allergene={}, zusatzstoffe={}):
    document = parse(urlopen(url).read(), 'lxml')
    for day_table in document.find_all('table', 'swbs_speiseplan'):
        caption = day_table.find('th', 'swbs_speiseplan_head').text
        if type not in caption:
            continue
        date = extractDate(caption)
        meals = day_table.find_all('tr')
        pos = 0
        while pos < len(meals):
            meal_tr = meals[pos]
            if not meal_tr.find('td'):  # z.B Headline
                pos += 1
                continue
            tds = meal_tr.find_all('td')
            category = re.sub(r' \(\d\)', '', tds[0].text.strip())
            name = tds[1].text.strip()
            if tds[1].find('a', href='http://www.stw-on.de/mensavital'):
                notes = ['MensaVital']
            else:
                notes = []
            for img in tds[2].find_all('img'):
                title = img['title']
                if ':' in title:
                    kind, value = title.split(':')
                    if kind == 'Allergene':
                        for allergen in value.split(','):
                            notes.append(allergene.get(allergen.strip()) or allergene[allergen.strip()[:-1]])
                    elif kind == 'Zusatzstoffe':
                        for zusatzstoff in value.split(','):
                            notes.append(zusatzstoffe[zusatzstoff.strip()])
                    else:
                        print('Unknown image type "{}"'.format(kind))
                else:
                    notes.append(title.replace('enthält ', ''))
            prices = {
                'student':  tds[3].text.strip(),
                'employee': tds[4].text.strip(),
                'other':    tds[5].text.strip()
            }
            if pos < len(meals) - 1:
                nextTds = meals[pos+1].find_all('td')
                if nextTds[0].text.strip() == '':
                    pos += 1
                    for img in nextTds[1].find_all('img'):
                        notes.append(img['title'])
            pos += 1
            canteen.addMeal(date, category or 'Sonstiges', name, notes, prices)
Exemplo n.º 6
0
def parse_week(url, canteen, type, allergene={}, zusatzstoffe={}):
    document = parse(urlopen(url).read())
    for day_table in document.find_all("table", "swbs_speiseplan"):
        caption = day_table.find("th", "swbs_speiseplan_head").text
        if type not in caption:
            continue
        date = extractDate(caption)
        meals = day_table.find_all("tr")
        pos = 0
        while pos < len(meals):
            meal_tr = meals[pos]
            if not meal_tr.find("td"):  # z.B Headline
                pos += 1
                continue
            tds = meal_tr.find_all("td")
            category = re.sub(r" \(\d\)", "", tds[0].text.strip())
            name = tds[1].text.strip()
            if tds[1].find("a", href="http://www.stw-on.de/mensavital"):
                notes = ["MensaVital"]
            else:
                notes = []
            for img in tds[2].find_all("img"):
                title = img["title"]
                if ":" in title:
                    kind, value = title.split(":")
                    if kind == "Allergene":
                        for allergen in value.split(","):
                            notes.append(allergene.get(allergen.strip()) or allergene[allergen.strip()[:-1]])
                    elif kind == "Zusatzstoffe":
                        for zusatzstoff in value.split(","):
                            notes.append(zusatzstoffe[zusatzstoff.strip()])
                    else:
                        print('Unknown image type "{}"'.format(kind))
                else:
                    notes.append(title.replace("enthält ", ""))
            prices = {"student": tds[3].text.strip(), "employee": tds[4].text.strip(), "other": tds[5].text.strip()}
            if pos < len(meals) - 1:
                nextTds = meals[pos + 1].find_all("td")
                if nextTds[0].text.strip() == "":
                    pos += 1
                    for img in nextTds[1].find_all("img"):
                        notes.append(img["title"])
            pos += 1
            canteen.addMeal(date, category, name, notes, prices)
Exemplo n.º 7
0
def parse_week(url, canteen):
    document = parse(urlopen(url).read())
    for day_table in document.find_all('table', 'speiseplan'):
        date = extractDate(day_table.thead.tr.th.text)
        if day_table.find('td', 'keinangebot'):
            canteen.setDayClosed(date)
            continue
        for meal_tr in day_table.tbody.children:
            if len(meal_tr.find_all('a') or []) < 1:
                continue
            name = meal_tr.td.text
            if ': ' in name:
                category, name = name.split(': ', 1)
            else:
                category = 'Angebote'
            if len(name) > 200:
                name = name[:200] + ' ...'
            notes = []
            for img in meal_tr.contents[1].find_all('img'):
                notes.append(img['title'])
            canteen.addMeal(date, category, name, notes,
                            price_regex.findall(meal_tr.contents[2].text), roles)
Exemplo n.º 8
0
def parse_dish(dish, canteen):

    date = extractDate(dish['data-date'])

    name = dish.find(class_='neo-menu-single-title')
    if name is not None:
        notes = set(x['title'] for x in name.find_all(name='abbr'))
    else:
        return

    name = re.sub(notes_regex, '', name.text.strip())
    if len(name) == 0:
        return

    # Fix formating issues:
    name = re.sub(whitspace_regex, ' ', name)  # Multiple Whitespace
    name = re.sub(comma_regex, ', ',
                  name.strip(', '))  # No whitspace after comma
    name = re.sub(bracket_regex, ' (', name)

    category = dish.find(class_='neo-menu-single-type')
    if category is not None:
        category = category.text
    elif dish.find_previous(name='h2') is not None:
        # A side
        category = 'Beilagen: ' + dish.find_previous(
            name='h2').text.capitalize()
    else:
        # Just in case
        category = 'Unbekannt'

    price = dish.find(class_='neo-menu-single-price')
    if price is not None:
        prices = price_regex.findall(price.text)
    else:
        prices = {}

    canteen.addMeal(date, category, name, notes, prices, roles)
    return
Exemplo n.º 9
0
def parse_week(url, canteen):
    document = parse(urlopen(url).read())
    for day_table in document.find_all('table', 'speiseplan'):
        date = extractDate(day_table.thead.tr.th.text)
        if day_table.find('td', 'keinangebot'):
            canteen.setDayClosed(date)
            continue
        for meal_tr in day_table.tbody.children:
            if len(meal_tr.find_all('a') or []) < 1:
                continue
            name = meal_tr.td.text
            if ': ' in name:
                category, name = name.split(': ', 1)
            else:
                category = 'Angebote'
            if len(name) > 200:
                name = name[:200] + ' ...'
            notes = []
            for img in meal_tr.contents[1].find_all('img'):
                notes.append(img['title'])
            canteen.addMeal(date, category, name, notes,
                            price_regex.findall(meal_tr.contents[2].text), roles)
Exemplo n.º 10
0
def parse_week(url, canteen, type):
    document = parse(urlopen(url).read())
    for day_table in document.find_all('table', 'swbs_speiseplan'):
        caption = day_table.find('th', 'swbs_speiseplan_head').text
        if type not in caption:
            continue
        date = extractDate(caption)
        for meal_tr in day_table.find_all('tr'):
            if not meal_tr.find('td'):  # z.B Headline
                continue
            tds = meal_tr.find_all('td')
            category = tds[0].text.strip()
            name = tds[1].text
            if tds[1].find('a', href='http://www.stw-on.de/mensavital'):
                notes = ['MensaVital']
            else:
                notes = []
            prices = {
                'student':  tds[2].text,
                'employee': tds[3].text,
                'other':    tds[4].text
            }
            canteen.addMeal(date, category, name, notes, prices)
Exemplo n.º 11
0
def parse_dish(dish, canteen):

    date = extractDate(dish['data-date'])

    name = dish.find(class_='neo-menu-single-title')
    if name is not None:
        notes = set(x['title'] for x in name.find_all(name='abbr'))
    else:
        return

    name = re.sub(notes_regex, '', name.text.strip())
    if len(name) == 0:
        return

    # Fix formating issues:
    name = re.sub(whitspace_regex, ' ', name)  # Multiple Whitespace
    name = re.sub(comma_regex, ', ', name.strip(', '))  # No whitspace after comma
    name = re.sub(bracket_regex, ' (', name)

    category = dish.find(class_='neo-menu-single-type')
    if category is not None:
        category = category.text
    elif dish.find_previous(name='h2') is not None:
        # A side
        category = 'Beilagen: ' + dish.find_previous(name='h2').text.capitalize()
    else:
        # Just in case
        category = 'Unbekannt'

    price = dish.find(class_='neo-menu-single-price')
    if price is not None:
        prices = price_regex.findall(price.text)
    else:
        prices = {}

    canteen.addMeal(date, category, name, notes, prices, roles)
    return
Exemplo n.º 12
0
 def test_unknown_date_format(self):
     with pytest.raises(ValueError):
         extractDate('2050.11-24')
Exemplo n.º 13
0
 def test_d_mm_yyyy(self):
     assert extractDate('7.03.2013') == self.date
Exemplo n.º 14
0
 def test_dd_mm_yy(self):
     assert extractDate('07.03.13') == self.date
Exemplo n.º 15
0
 def test_passing_of_date_objects(self):
     assert extractDate(self.date) is self.date
Exemplo n.º 16
0
 def test_yy_m_d(self):
     assert extractDate('13-3-7') == self.date
Exemplo n.º 17
0
 def test_yy_mm_dd(self):
     assert extractDate('13-03-07') == self.date
Exemplo n.º 18
0
 def test_yyyy_m_dd(self):
     assert extractDate('2013-3-07') == self.date
Exemplo n.º 19
0
 def test_yyyy_mm_d(self):
     assert extractDate('2013-03-7') == self.date
Exemplo n.º 20
0
 def test_d_m_yy(self):
     assert extractDate('7.3.13') == self.date
Exemplo n.º 21
0
 def test_unknown_month(self):
     with pytest.raises(ValueError):
         extractDate('07. Hans 2013')
Exemplo n.º 22
0
 def test_dd_DENAME_yyyy(self):
     assert extractDate('07 März 2013') == self.date
     assert extractDate('07 Maerz 2013') == self.date
     assert extractDate('07März 2013') == self.date
     assert extractDate('07Maerz 2013') == self.date
Exemplo n.º 23
0
 def test_ddDOT_ENNAME_yy(self):
     assert extractDate('07. March 13') == self.date
     assert extractDate('07. march 13') == self.date
     assert extractDate('07.March 13') == self.date
     assert extractDate('07.march 13') == self.date
Exemplo n.º 24
0
 def test_dd_m_yyyy(self):
     assert extractDate('07.3.2013') == self.date
Exemplo n.º 25
0
 def test_dd_ENNAME_yy(self):
     assert extractDate('07 March 13') == self.date
     assert extractDate('07 march 13') == self.date
     assert extractDate('07March 13') == self.date
     assert extractDate('07march 13') == self.date
Exemplo n.º 26
0
 def test_ddDOT_DENAME_yy(self):
     assert extractDate('07. März 13') == self.date
     assert extractDate('07. Maerz 13') == self.date
     assert extractDate('07.März 13') == self.date
     assert extractDate('07.Maerz 13') == self.date