def parse_week(url, date, canteen): url += '/{0}/{1:0>2}/'.format(*date.isocalendar()) document = parse(urlopen(url).read()) week_data = document.find('table', id='week-menu') if week_data is None: print('week not found') return weekDays = extractWeekDates(week_data.thead.find_all('th')[0].text) for category_tr in week_data.find_all('tr'): category = category_tr.find('th').text i = 0 for day_td in category_tr.find_all('td'): for meal_data in day_td.find_all('p', 'dish'): if not meal_data.find('strong'): continue name = extra_regex.sub('', meal_data.find('strong').text) name = strip_regex.sub(' ', name).strip() if len(name) > 250: name = name[:245] + '...' notes = [ span['title'] for span in meal_data.find_all('span', 'tooltip') ] notes += [img['title'] for img in meal_data.find_all('img')] prices = price_regex.findall( meal_data.find('span', 'price').text) canteen.addMeal(weekDays[i], category, name, list(set(notes)), prices, ('student', 'employee', 'other')) i += 1
def parse_week(url, canteen, mensa): document = parse(urlopen(url).read()) # extra legends information canteen.setLegendData(text=document.find(text='Kennzeichnung: ').parent.next_sibling.get_text().replace(' ', ' ')) # additional charges prices = {} for p in document.find_all('p'): match = employeePrice.search(p.text) if match: prices['employee'] = match.group('price') match = otherPrice.search(p.text) if match: prices['other'] = match.group('price') if len(prices) != 2: print('Could not extract addtional charges for employee and others') canteen.setAdditionalCharges('student', prices) # find mensa_data = document.find('h1', text=re.compile(mensa)).parent while type(mensa_data) != Tag or mensa_data.name != 'div'\ or 'tx-cagcafeteria-pi1' not in mensa_data.get('class', []): mensa_data = mensa_data.next_sibling weekDays = extractWeekDates(mensa_data.find('h2').text) for day_headline in mensa_data.find_all('h3'): date = weekDays[day_headline.text] day_table = day_headline.next_sibling.next_sibling for tr_menu in day_table.tbody.find_all('tr'): category = tr_menu.find_all('td')[0].text.strip() name = tr_menu.find_all('td')[1].text.replace('\r\n', ' ').strip() canteen.addMeal(date, category, name, [], tr_menu.find_all('td')[2].text)
def parse_week(url, date, canteen): url += '/{0}/{1:0>2}/'.format(*date.isocalendar()) document = parse(urlopen(url).read()) week_data = document.find('table', id='week-menu') if week_data is None: print('week not found') return weekDays = extractWeekDates(week_data.thead.find_all('th')[0].text) for category_tr in week_data.find_all('tr'): category = category_tr.find('th').text i = 0 for day_td in category_tr.find_all('td'): for meal_data in day_td.find_all('p', 'dish'): if not meal_data.find('strong'): continue name = extra_regex.sub('', meal_data.find('strong').text) name = strip_regex.sub(' ', name).strip() if len(name) > 250: name = name[:245] + '...' notes = [span['title'] for span in meal_data.find_all('span', 'tooltip')] notes += [img['title'] for img in meal_data.find_all('img')] prices = price_regex.findall(meal_data.find('span', 'price').text) canteen.addMeal(weekDays[i], category, name, list(set(notes)), prices, ('student', 'employee', 'other') ) i += 1