Пример #1
0
def parse_week(url, date, canteen):
    url += '/{0}/{1:0>2}/'.format(*date.isocalendar())
    document = parse(urlopen(url).read())
    week_data = document.find('table', id='week-menu')
    if week_data is None:
        print('week not found')
        return
    weekDays = extractWeekDates(week_data.thead.find_all('th')[0].text)
    for category_tr in week_data.find_all('tr'):
        category = category_tr.find('th').text
        i = 0
        for day_td in category_tr.find_all('td'):
            for meal_data in day_td.find_all('p', 'dish'):
                if not meal_data.find('strong'):
                    continue
                name = extra_regex.sub('', meal_data.find('strong').text)
                name = strip_regex.sub(' ', name).strip()
                if len(name) > 250:
                    name = name[:245] + '...'
                notes = [
                    span['title']
                    for span in meal_data.find_all('span', 'tooltip')
                ]
                notes += [img['title'] for img in meal_data.find_all('img')]
                prices = price_regex.findall(
                    meal_data.find('span', 'price').text)
                canteen.addMeal(weekDays[i], category, name, list(set(notes)),
                                prices, ('student', 'employee', 'other'))
            i += 1
Пример #2
0
def parse_week(url, canteen, mensa):
    document = parse(urlopen(url).read())
    # extra legends information
    canteen.setLegendData(text=document.find(text='Kennzeichnung: ').parent.next_sibling.get_text().replace(' ', ' '))
    # additional charges
    prices = {}
    for p in document.find_all('p'):
        match = employeePrice.search(p.text)
        if match:
            prices['employee'] = match.group('price')
        match = otherPrice.search(p.text)
        if match:
            prices['other'] = match.group('price')
    if len(prices) != 2:
        print('Could not extract addtional charges for employee and others')
    canteen.setAdditionalCharges('student', prices)
    # find
    mensa_data = document.find('h1', text=re.compile(mensa)).parent
    while type(mensa_data) != Tag or mensa_data.name != 'div'\
            or 'tx-cagcafeteria-pi1' not in mensa_data.get('class', []):
        mensa_data = mensa_data.next_sibling
    weekDays = extractWeekDates(mensa_data.find('h2').text)
    for day_headline in mensa_data.find_all('h3'):
        date = weekDays[day_headline.text]
        day_table = day_headline.next_sibling.next_sibling
        for tr_menu in day_table.tbody.find_all('tr'):
            category = tr_menu.find_all('td')[0].text.strip()
            name = tr_menu.find_all('td')[1].text.replace('\r\n', ' ').strip()
            canteen.addMeal(date, category, name, [], tr_menu.find_all('td')[2].text)
Пример #3
0
def parse_week(url, date, canteen):
    url += '/{0}/{1:0>2}/'.format(*date.isocalendar())
    document = parse(urlopen(url).read())
    week_data = document.find('table', id='week-menu')
    if week_data is None:
        print('week not found')
        return
    weekDays = extractWeekDates(week_data.thead.find_all('th')[0].text)
    for category_tr in week_data.find_all('tr'):
        category = category_tr.find('th').text
        i = 0
        for day_td in category_tr.find_all('td'):
            for meal_data in day_td.find_all('p', 'dish'):
                if not meal_data.find('strong'):
                    continue
                name = extra_regex.sub('', meal_data.find('strong').text)
                name = strip_regex.sub(' ', name).strip()
                if len(name) > 250:
                    name = name[:245] + '...'
                notes = [span['title'] for span in meal_data.find_all('span', 'tooltip')]
                notes += [img['title'] for img in meal_data.find_all('img')]
                prices = price_regex.findall(meal_data.find('span', 'price').text)
                canteen.addMeal(weekDays[i], category, name,
                                list(set(notes)),
                                prices, ('student', 'employee', 'other')
                                )
            i += 1