forked from mswart/openmensa-parsers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
hamburg.py
46 lines (40 loc) · 1.82 KB
/
hamburg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!python3
from urllib.request import urlopen
from bs4 import BeautifulSoup as parse
import re
import datetime
from pyopenmensa.feed import LazyBuilder, extractWeekDates
extra_regex = re.compile('\(.*?\)')
strip_regex = re.compile('\s{2,}')
price_regex = re.compile('(?P<price>\d+[,.]\d{2}) ?€?')
def parse_week(url, canteen):
document = parse(urlopen(url).read())
week_data = document.find('table', id='week-menu')
weekDays = extractWeekDates(week_data.thead.find_all('th')[0].text)
for category_tr in week_data.find_all('tr'):
category = category_tr.find('th').text
i = 0
for day_td in category_tr.find_all('td'):
for meal_data in day_td.find_all('p', 'dish'):
if not meal_data.find('strong'):
continue
name = extra_regex.sub('', meal_data.find('strong').text)
name = strip_regex.sub(' ', name).strip()
if len(name) > 250:
name = name[:245] + '...'
notes = [span['title'] for span in meal_data.find_all('span', 'tooltip')]
notes += [img['title'] for img in meal_data.find_all('img')]
prices = price_regex.findall(meal_data.find('span', 'price').text)
canteen.addMeal(weekDays[i], category, name,
list(set(notes)),
prices, ('student', 'employee', 'other')
)
i += 1
def parse_url(url, today=False):
canteen = LazyBuilder()
parse_week(url + (datetime.date.today()
+ datetime.date.resolution * 7).strftime('/%Y/%W/'), canteen)
if not today:
parse_week(url + (datetime.date.today()
+ datetime.date.resolution * 14).strftime('/%Y/%W/'), canteen)
return canteen.toXMLFeed()