continue if len(tr) != 2: continue # no meal strings = list(tr.contents[0].strings) name = strings[0] # prices: prices = strings[-1].split('|') if '-' in map(lambda v: v.strip(), prices): prices = {} # notes: notes = [] for img in tr.contents[1].find_all('img'): notes.append(img['alt'].replace('Symbol', '').strip()) for extra in list(set(map(lambda v: int(v), extra_regex.findall(tr.text)))): if extra in extraLegend: notes.append(extraLegend[extra]) canteen.addMeal(date, 'Hauptgerichte', name, notes, prices, roles if prices else None) return canteen.toXMLFeed() parser = Parser('magdeburg', handler=parse_url, shared_prefix='http://www.studentenwerk-magdeburg.de/') parser.define('ovgu-unten', suffix='mensen-cafeterien/mensa-unicampus/speiseplan-unten/') parser.define('ovgu-oben', suffix='mensen-cafeterien/mensa-unicampus/speiseplan-oben/') parser.define('herrenkrug', suffix='mensen-cafeterien/mensa-herrenkrug/speiseplan/') parser.define('kellercafe', suffix='mensen-cafeterien/mensa-kellercafe/speiseplan/') parser.define('stendal', suffix='mensen-cafeterien/mensa-stendal/speiseplan/') parser.define('halberstadt', suffix='mensen-cafeterien/mensa-halberstadt/speiseplan/') parser.define('wernigerode', suffix='mensen-cafeterien/mensa-wernigerode/speiseplan/')
canteen.addMeal(wdate, category, description, notes=supplies, prices={ 'student': student_price, 'employee': staff_price, 'other': guest_price }) parser = Parser('dortmund', handler=parse_url, shared_prefix='https://www.stwdo.de/mensa-co/') parser.define('tu-hauptmensa', suffix='tu-dortmund/hauptmensa/') parser.define('tu-mensa-sued', suffix='tu-dortmund/mensa-sued/') parser.define('tu-vital', suffix='tu-dortmund/vital/') parser.define('tu-archeteria', suffix='tu-dortmund/archeteria/') parser.define('tu-calla', suffix='tu-dortmund/restaurant-calla/') parser.define('tu-food-fakultaet', suffix='tu-dortmund/food-fakultaet/') parser.define('fh-mensa-max-ophuels-platz', suffix='fh-dortmund/max-ophuels-platz/') parser.define('fh-mensa-sonnenstrasse', suffix='fh-dortmund/sonnenstrasse/') parser.define('fh-kostbar', suffix='fh-dortmund/mensa-kostbar/') parser.define('ism-mensa', suffix='ism/mensa-der-ism/') parser.define('fernuni-mensa', suffix='hagen') parser.define('fsw-snackit', suffix='fh-suedwestfalen/hagen/') parser.define('fsw-canape', suffix='fh-suedwestfalen/iserlohn/') parser.define('fsw-davinci', suffix='fh-suedwestfalen/meschede/') parser.define('fsw-mensa', suffix='fh-suedwestfalen/soest/')
base_data = {} base_data['roles'] = {} base_data['notices'] = data['notices'] for loc in data['locations']: if loc not in LOCATIONS: # Found an unknown location # Please consider updating the parser! sys.stderr.write( 'Unknown location: %s (displayName: %s)\n' % (loc, data['locations'][loc]['displayName'])) for role in data['priceTiers']: if role not in ROLES: # Found an unknown price tier # Please consider updating the parser! sys.stderr.write( 'Unknown price tier: %s (displayName: %s)\n' % (role, data['priceTiers'][role])) else: base_data['roles'][role] = ROLES[role] return base_data parser = Parser('saarland', handler=parse_url, shared_prefix=URL_BASE + URL_MENU) for loc in LOCATIONS: parser.define(loc, suffix=loc)
data = json.loads(response.read().decode()) base_data = {} base_data['roles'] = {} base_data['notices'] = data['notices'] for loc in data['locations']: if loc not in LOCATIONS: # Found an unknown location # Please consider updating the parser! sys.stderr.write('Unknown location: %s (displayName: %s)\n' % (loc, data['locations'][loc]['displayName'])) for role in data['priceTiers']: if role not in ROLES: # Found an unknown price tier # Please consider updating the parser! sys.stderr.write('Unknown price tier: %s (displayName: %s)\n' % (role, data['priceTiers'][role])) else: base_data['roles'][role] = ROLES[role] return base_data parser = Parser('saarland', handler=parse_url, shared_prefix=URL_BASE + URL_MENU) for loc in LOCATIONS: parser.define(loc, suffix=loc)
while type(mensa_data) != Tag or mensa_data.name != 'div'\ or 'tx-cagcafeteria-pi1' not in mensa_data.get('class', []): mensa_data = mensa_data.next_sibling weekDays = extractWeekDates(mensa_data.find('h2').text) for day_headline in mensa_data.find_all('h3'): date = weekDays[day_headline.text] day_table = day_headline.next_sibling.next_sibling for tr_menu in day_table.tbody.find_all('tr'): category = tr_menu.find_all('td')[0].text.strip() name = tr_menu.find_all('td')[1].text.replace('\r\n', ' ').strip() canteen.addMeal(date, category, name, [], tr_menu.find_all('td')[2].text) def parse_url(url, mensa, *weeks, today): canteen = LazyBuilder() for week in weeks: parse_week(url + week, canteen, mensa) if today: break return canteen.toXMLFeed() parser = Parser('marburg', handler=parse_url, shared_args=['http://www.studentenwerk-marburg.de/essen-trinken/speiseplan/']) parser.define('bistro', args=['Speiseplan.*Bistro', 'diese-woche-bistro.html', 'naechste-woche-bistro.html']) parser.define('mos-diner', args=['Speiseplan.*Diner', 'diese-woche-mos-diner.html']) parser.define('erlenring', args=['Mensa Erlenring', 'diese-woche-mensa-erlenring-und-lahnberge.html', 'naechste-woche-mensa-erlenring-und-lahnberge.html']) parser.define('lahnberge', args=['Mensa Lahnberge', 'diese-woche-mensa-erlenring-und-lahnberge.html', 'naechste-woche-mensa-erlenring-und-lahnberge.html'])
canteen.addMeal(mdate, category, mname, mnotes, prices, roles) except ValueError as e: print('could not add meal {}/{} "{}" due to "{}"'.format( mdate, category, mname, e), file=sys.stderr) # empty meal ... pass # return xml data return canteen.toXMLFeed() parser = Parser('niederbayern_oberpfalz', handler=parse_url, shared_prefix='http://www.stwno.de/infomax/daten-extern/csv/') parser.define('th-deggendorf', suffix='HS-DEG') parser.define('hs-landshut', suffix='HS-LA') parser.define('wz-straubing', suffix='HS-SR') parser.define('uni-passau', suffix='UNI-P') parser.define('unip-cafeteria-nikolakloster', suffix='Cafeteria-Nikolakloster') parser.define('oth-regensburg', suffix='HS-R-tag') parser.define('oth-regensburg-abends', suffix='HS-R-abend') parser.define('othr-cafeteria-pruefening', suffix='Cafeteria-Pruefening') parser.define('uni-regensburg', suffix='UNI-R') parser.define('unir-cafeteria-pt', suffix='Cafeteria-PT') parser.define('unir-cafeteria-chemie', suffix='Cafeteria-Chemie') parser.define('unir-cafeteria-milchbar', suffix='Cafeteria-Milchbar') parser.define('unir-cafeteria-sammelgebaeude', suffix='Cafeteria-Sammelgebaeude') parser.define('unir-cafeteria-sport', suffix='Cafeteria-Sport')
day = datetime.date.today() emptyCount = 0 totalCount = 0 while emptyCount < 7 and totalCount < 32: if not parse_day( canteen, '{}&tag={}&monat={}&jahr={}'.format( url, day.day, day.month, day.year), day.strftime('%Y-%m-%d')): emptyCount += 1 else: emptyCount = 0 if today: break totalCount += 1 day += datetime.date.resolution return canteen.toXMLFeed() parser = Parser( 'chemnitz_zwickau', handler=parse_url, shared_prefix='http://www.swcz.de/bilderspeiseplan/xml.php?plan=') parser.define('mensa-reichenhainer-strasse', suffix='1479835489') parser.define('mensa-strasse-der-nationen', suffix='773823070') parser.define('mensa-ring', suffix='4') parser.define('mensa-scheffelberg', suffix='3') parser.define('cafeteria-reichenhainer-strasse', suffix='7') parser.define('cafeteria-strasse-der-nationen', suffix='6') parser.define('cafeteria-ring', suffix='5') parser.define('cafeteria-scheffelberg', suffix='8')
fromDate += datetime.date.resolution continue if len(tr) != 3: continue # no meal strings = list(tr.contents[0].strings) name = strings[0] # prices: prices = strings[-1].split('|') if '-' in map(lambda v: v.strip(), prices): prices = {} # notes: notes = [] for img in tr.contents[1].find_all('img'): notes.append(img['alt'].replace('Symbol', '').strip()) for extra in list(set(map(lambda v: int(v), extra_regex.findall(tr.text)))): if extra in extraLegend: notes.append(extraLegend[extra]) canteen.addMeal(date, 'Hauptgerichte', name, notes, prices, roles if prices else None) return canteen.toXMLFeed() parser = Parser('magdeburg', handler=parse_url, shared_prefix='http://www.studentenwerk-magdeburg.de/') parser.define('ovgu-unten', suffix='mensa-unicampus/speiseplan-unten/') parser.define('ovgu-oben', suffix='mensa-unicampus/speiseplan-oben/') parser.define('herrenkrug', suffix='mensa-herrenkrug/speiseplan/') parser.define('stendal', suffix='mensa-stendal/speiseplan/') parser.define('halberstadt', suffix='mensa-halberstadt/speiseplan/') parser.define('wernigerode', suffix='mensa-wernigerode/speiseplan/')
for img in meal_tr.contents[1].find_all("img"): notes.append(img["title"]) canteen.addMeal(date, category, name, notes, price_regex.findall(meal_tr.contents[2].text), roles) def parse_url(url, today=False): canteen = LazyBuilder() parse_week(url + ".html", canteen) if not today: parse_week(url + "-w1.html", canteen) parse_week(url + "-w2.html", canteen) return canteen.toXMLFeed() parser = Parser("dresden", handler=parse_url, shared_prefix="http://www.studentenwerk-dresden.de/mensen/speiseplan/") parser.define("reichenbachstrasse", suffix="mensa-reichenbachstrasse") parser.define("zeltschloesschen", suffix="zeltschloesschen") parser.define("alte-mensa", suffix="alte-mensa") parser.define("mensologie", suffix="mensologie") parser.define("siedepunkt", suffix="mensa-siedepunkt") parser.define("johannstadt", suffix="mensa-johannstadt") parser.define("wueins", suffix="mensa-wueins") parser.define("bruehl", suffix="mensa-bruehl") parser.define("u-boot", suffix="u-boot") parser.define("tellerrandt", suffix="mensa-tellerrandt") parser.define("zittau", suffix="mensa-zittau") parser.define("stimm-gabel", suffix="mensa-stimm-gabel") parser.define("palucca-schule", suffix="mensa-palucca-schule") parser.define("goerlitz", suffix="mensa-goerlitz") parser.define("sport", suffix="mensa-sport") parser.define("kreuzgymnasium", suffix="mensa-kreuzgymnasium")
canteen.addMeal(date, category, name, notes, prices, roles) def parse_url(url, today=False): canteen = LazyBuilder() parse_week(url + '.html?view=list', canteen) if not today: parse_week(url + '-w1.html?view=list', canteen) parse_week(url + '-w2.html?view=list', canteen) return canteen.toXMLFeed() parser = Parser('dresden', handler=parse_url, shared_prefix='https://www.studentenwerk-dresden.de/mensen/speiseplan/') parser.define('dresden-reichenbachstrasse', suffix='mensa-reichenbachstrasse') parser.define('dresden-zeltschloesschen', suffix='zeltschloesschen') parser.define('dresden-alte-mensa', suffix='alte-mensa') parser.define('dresden-mensologie', suffix='mensologie') parser.define('dresden-siedepunkt', suffix='mensa-siedepunkt') parser.define('dresden-johannstadt', suffix='mensa-johannstadt') parser.define('tharandt-tellerrandt', suffix='mensa-tellerrandt') parser.define('dresden-palucca-hochschule', suffix='mensa-palucca-hochschule') parser.define('dresden-wueins', suffix='mensa-wueins') parser.define('dresden-bruehl', suffix='mensa-bruehl') parser.define('dresden-stimm-gabel', suffix='mensa-stimm-gabel') parser.define('dresden-u-boot', suffix='u-boot') parser.define('dresden-mensa-sport', suffix='mensa-sport') parser.define('dresden-cafe-cube', suffix='grill-cube') parser.define('dresden-cafe-mobil', suffix='pasta-mobil') parser.define('zittau-kraatschn', suffix='mensa-kraatschn')
content = urlopen("{}?selWeek={}".format(url, week)).read() document = parse(content, 'lxml') parse_meals_for_canteen(document, canteen, employees_fee, guests_fee, groups, today) if today: break return canteen.toXMLFeed() parser = Parser( 'thueringen', handler=parse_url, shared_prefix='http://www.stw-thueringen.de/deutsch/mensen/einrichtungen/') parser.define('ei-wartenberg', suffix='eisenach/mensa-am-wartenberg-2.html') parser.define('ef-nordhaeuser', suffix='erfurt/mensa-nordhaeuser-strasse.html') parser.define('ef-altonaer', suffix='erfurt/mensa-altonaer-strasse.html') parser.define('ef-schlueterstr', suffix='erfurt/cafeteria-schlueterstrasse.html') parser.define('ef-leipzigerstr', suffix='erfurt/cafeteria-leipziger-strasse.html') parser.define('ge-freundschaft', suffix='gera/mensa-weg-der-freundschaft.html') parser.define('il-ehrenberg', suffix='ilmenau/mensa-ehrenberg.html') parser.define('il-cafeteria', suffix='ilmenau/cafeteria-mensa-ehrenberg.html') parser.define('il-nanoteria', suffix='ilmenau/cafeteria-nanoteria.html') parser.define('il-roentgen', suffix='ilmenau/cafeteria-roentgenbau.html') parser.define('je-zeiss', suffix='jena/mensa-carl-zeiss-promenade.html') parser.define('je-eah', suffix='jena/cafeteria-eah.html') parser.define('je-ernstabbe', suffix='jena/mensa-ernst-abbe-platz.html') parser.define('je-vegeTable', suffix='jena/vegetable.html')
document = parse(data, 'lxml') dish = document.find(class_='neo-menu-single-dishes') if dish is not None: dishes = dish.find_all(name='tr', attrs={"data-canteen": data_canteen}) else: dishes = [] side = document.find(class_='neo-menu-single-modals') if side is not None: dishes = dishes + side.find_all(name='tr', attrs={"data-canteen": data_canteen}) for dish in dishes: parse_dish(dish, canteen) return canteen.toXMLFeed() parser = Parser( 'marburg', handler=parse_url, shared_args=[ 'https://studentenwerk-marburg.de/essen-trinken/speisekarte/' ]) parser.define('bistro', args=[460]) parser.define('mos-diner', args=[420]) parser.define('erlenring', args=[330]) parser.define('lahnberge', args=[340]) parser.define('cafeteria-lahnberge', args=[490])
price_div = meal_article.find('div', 'price') if price_div: for k, v in price_map.items(): price = price_div['data-' + k] if price: prices[v] = price canteen.addMeal(date, category, name, notes, prices) if closed_candidate and not canteen.hasMealsFor(date): canteen.setDayClosed(date) return canteen.toXMLFeed() parser = Parser('wuerzburg', handler=parse_url, shared_prefix='https://www.studentenwerk-wuerzburg.de/essen-trinken/speiseplaene/plan/') parser.define('austrasse', suffix='austrasse-bamberg.html') parser.define('burse', suffix='burse-am-studentenhaus-wuerzburg.html') parser.define('feldkirchenstrasse', suffix='feldkirchenstrasse-bamberg.html') #parser.define('frankenstube', suffix='frankenstube-wuerzburg.html') #parser.define('hubland', suffix='mensa-am-hubland-wuerzburg.html') parser.define('studentenhaus', suffix='mensa-am-studentenhaus.html') parser.define('aschaffenburg', suffix='mensa-aschaffenburg.html') parser.define('augenklinik', suffix='mensa-roentgenring-wuerzburg.html') parser.define('josef-schneider', suffix='mensa-josef-schneider-strasse-wuerzburg.html') parser.define('schweinfurt', suffix='mensa-schweinfurt.html') parser.define('mensateria', suffix='mensateria-campus-hubland-nord-wuerzburg.html') parser.define('philo', suffix='philosophie-wuerzburg.html') parser.define('sprachenzentrum', suffix='interimsmensa-im-sprachenzentrum-ab-9-april-2018.html')
canteen.addMeal(date, category, menuName, notes, prices) else: canteen.setDayClosed(date) # check for further pages nextPageLink = dom.find(id='next_day_link') if nextPageLink == None or today: url = None else: url = 'https://www.stw-rw.de/' + nextPageLink['href'] return canteen.toXMLFeed() def parse_url(url, today=False): splitted = url.split('#') return parsePlan(splitted[0], splitted[1], today) parser = Parser( 'rostock', handler=parse_url, shared_prefix= 'https://www.stw-rw.de/de/mensen-und-cafeterien/speiseplaene.html') parser.define('mensa-sued', suffix='#mensa_id_1') parser.define('campus-cafeteria-einstein', suffix='#mensa_id_13') parser.define('mensa-kantilene', suffix='#mensa_id_15') parser.define('mensa-st-georg-strasse', suffix='#mensa_id_2') parser.define('mensa-multiple-choice', suffix='#mensa_id_14') parser.define('mensa-ulme', suffix='#mensa_id_3') parser.define('campus-mensa-wismar', suffix='#mensa_id_5')
try: canteen.addMeal(date, category=location_tag.string, name=title_tag.string, prices=price_tag.string) except ValueError as e: print('Error adding meal "{}": {}'.format(meal_item, e)) def parse_url(url, today=False): canteen = LazyBuilder() canteen.setAdditionalCharges('student', {}) parse_week(url, canteen) return canteen.toXMLFeed() parser = Parser( 'darmstadt', handler=parse_url, shared_prefix= 'http://studierendenwerkdarmstadt.de/hochschulgastronomie/speisekarten/') parser.define('stadtmitte', suffix='stadtmitte/') parser.define('lichtwiese', suffix='lichtwiese/') parser.define('schoefferstrasse', suffix='schoefferstrasse/') parser.define('dieburg', suffix='dieburg/') parser.define('haardtring', suffix='haardtring/') # for debugging / testing if __name__ == "__main__": print(parser.parse("darmstadt", "stadtmitte", None))
price_tag = meal_item.find("span", {"class": "fmc-item-price"}) try: canteen.addMeal(date, category=location_tag.string, name=title_tag.string, prices=price_tag.string) except ValueError as e: print('Error adding meal "{}": {}'.format(meal_item, e)) def parse_url(url, today=False): canteen = LazyBuilder() canteen.setAdditionalCharges("student", {}) parse_week(url, canteen) return canteen.toXMLFeed() parser = Parser( "darmstadt", handler=parse_url, shared_prefix="http://studierendenwerkdarmstadt.de/hochschulgastronomie/speisekarten/", ) parser.define("stadtmitte", suffix="stadtmitte/") parser.define("lichtwiese", suffix="lichtwiese/") parser.define("schoefferstrasse", suffix="schoefferstrasse/") parser.define("dieburg", suffix="dieburg/") parser.define("haardtring", suffix="haardtring/") # for debugging / testing if __name__ == "__main__": print(parser.parse("darmstadt", "stadtmitte", None))
canteen.addMeal(date, category, name, notes, prices, roles) def parse_url(url, today=False): canteen = LazyBuilder() parse_week(url + '.html?view=list', canteen) if not today: parse_week(url + '-w1.html?view=list', canteen) parse_week(url + '-w2.html?view=list', canteen) return canteen.toXMLFeed() parser = Parser('dresden', handler=parse_url, shared_prefix='https://www.studentenwerk-dresden.de/mensen/speiseplan/') parser.define('reichenbachstrasse', suffix='mensa-reichenbachstrasse') parser.define('zeltschloesschen', suffix='zeltschloesschen') parser.define('alte-mensa', suffix='alte-mensa') parser.define('mensologie', suffix='mensologie') parser.define('siedepunkt', suffix='mensa-siedepunkt') parser.define('johannstadt', suffix='mensa-johannstadt') parser.define('wueins', suffix='mensa-wueins') parser.define('bruehl', suffix='mensa-bruehl') parser.define('u-boot', suffix='u-boot') parser.define('tellerrandt', suffix='mensa-tellerrandt') parser.define('zittau', suffix='mensa-zittau') parser.define('stimm-gabel', suffix='mensa-stimm-gabel') parser.define('palucca-schule', suffix='mensa-palucca-schule') parser.define('goerlitz', suffix='mensa-goerlitz') parser.define('sport', suffix='mensa-sport') parser.define('kreuzgymnasium', suffix='mensa-kreuzgymnasium')
prices, ('student', 'employee', 'other') ) i += 1 def parse_url(url, today=False): canteen = LazyBuilder() parse_week(url, date.today(), canteen) if not today: parse_week(url, date.today() + date.resolution * 7, canteen) return canteen.toXMLFeed() parser = Parser('hamburg', handler=parse_url, shared_prefix='http://speiseplan.studierendenwerk-hamburg.de/de/') parser.define('armgartstrasse', suffix='590') parser.define('bergedorf', suffix='520') parser.define('berliner-tor', suffix='530') parser.define('botanischer-garten', suffix='560') parser.define('bucerius-law-school', suffix='410') parser.define('cafe-mittelweg', suffix='690') parser.define('cafe-cfel', suffix='680') parser.define('cafe-jungiusstrasse', suffix='610') parser.define('cafe-alexanderstrasse', suffix='660') parser.define('campus', suffix='340') parser.define('finkenau', suffix='420') parser.define('geomatikum', suffix='540') parser.define('harburg', suffix='570') parser.define('hcu', suffix='430') parser.define('philosophenturm', suffix='350') parser.define('stellingen', suffix='580')
mtype = row[4] if mtype != '': for i in mtype.split(','): notes.append('ZT' + i) prices = [row[6], row[7], row[8]] mnotes = [] for i in notes: mnotes.append(legend.get(i, legend.get(i[2:], i))) try: canteen.addMeal(mdate, category, mname, mnotes, prices, roles) except ValueError as e: print('could not add meal {}/{} "{}" due to "{}"'.format(mdate, category, mname, e), file=sys.stderr) # empty meal ... pass return canteen.toXMLFeed() parser = Parser('niederbayern_oberpfalz', handler=parse_url, shared_prefix='http://www.stwno.de/infomax/daten-extern/csv/') parser.define('th-deggendorf', suffix='HS-DEG') parser.define('hs-landshut', suffix='HS-LA') parser.define('uni-passau', suffix='UNI-P') parser.define('oth-regensburg', suffix='HS-R-tag') parser.define('uni-regensburg', suffix='UNI-R')
canteen.addMeal(date, category, menuName, notes, prices) else: canteen.setDayClosed(date) # check for further pages nextPageLink = dom.find(id='next_day_link') if nextPageLink == None or today: url = None else: url = 'https://www.studentenwerk-rostock.de/' + nextPageLink['href'] return canteen.toXMLFeed() def parse_url(url, today=False): splitted = url.split('#') return parsePlan(splitted[0], splitted[1], today) parser = Parser( 'rostock', handler=parse_url, shared_prefix= 'https://www.stw-rw.de/de/mensen-und-cafeterien/speiseplaene.html') parser.define('mensa-sued', suffix='#mensa_id_1') parser.define('campus-cafeteria-einstein', suffix='#mensa_id_13') parser.define('mensa-st-georg-straße', suffix='#mensa_id_2') parser.define('mensa-multiple-choice', suffix='#mensa_id_14') parser.define('mensa-kleine-ulme', suffix='#mensa_id_3') parser.define('mensa-ulme-69', suffix='#mensa_id_8') parser.define('campus-mensa-wismar', suffix='#mensa_id_5')
allergene=allergene, zusatzstoffe=zusatzstoffe) return canteen.toXMLFeed() parser = Parser('ostniedersachsen', handler=parse_url, shared_prefix='http://www.stw-on.de') sub = parser.sub('braunschweig', shared_prefix='/braunschweig/essen/menus/') sub.define('mensa1-mittag', suffix='mensa-1', extra_args={'canteentype': 'Mittagsmensa'}) sub.define('mensa1-abend', suffix='mensa-1', extra_args={'canteentype': 'Abendmensa'}) sub.define('mensa360', suffix='360', extra_args={'canteentype': 'Pizza', 'this_week': '-2', 'next_week': '-nachste-woche'}) sub.define('mensa2', suffix='mensa-2') sub.define('hbk', suffix='mensa-hbk') parser.define('clausthal', suffix='/clausthal/essen/menus/mensa-clausthal', extra_args={'next_week': '-kommend-woche'}) sub = parser.sub('hildesheim', shared_prefix='/hildesheim/essen/menus/') sub.define('uni', suffix='mensa-uni') sub.define('hohnsen', suffix='mensa-hohnsen') sub.define('luebecker-strasse', suffix='luebecker-strasse', extra_args={'canteentype': 'Mittagsausgabe'}) parser.sub('suderburg').define('campus', suffix='/suderburg/essen/menus/mensa-suderburg') parser.sub('wolfenbuettel').define('ostfalia', suffix='/wolfenbuettel/essen/menus/mensa-ostfalia') parser.sub('holzminden', shared_prefix='/holzminden/essen/menus/') \ .define('hawk', suffix='mensa-hawk', extra_args={'next_week': False}) sub = parser.sub('lueneburg', shared_prefix='/lueneburg/essen/speiseplaene/') sub.define('campus', suffix='mensa-campus') sub.define('rotes-feld', suffix='rotes-feld')
if notematch not in legends: print('unknown legend: {}'.format(notematch)) continue notes.append(legends[notematch]) canteen.addMeal(date, category, name, notes, price_regex.findall(line), roles) def parse_url(url, today=False): canteen = LazyBuilder() parse_week(url + '&wann=2', canteen) if not today: parse_week(url + '&wann=3', canteen) return canteen.toXMLFeed() parser = Parser('hannover', handler=parse_url, shared_prefix='http://www.stwh-portal.de/mensa/index.php?format=txt&wo=') parser.define('hauptmensa', suffix='2') parser.define('hauptmensa-marktstand', suffix='9') parser.define('restaurant-ct', suffix='10') parser.define('contine', suffix='3') parser.define('pzh', suffix='13') parser.define('caballus', suffix='1') parser.define('tiho-tower', suffix='0') parser.define('hmtmh', suffix='8') parser.define('ricklinger-stadtweg', suffix='6') parser.define('kurt-schwitters-forum', suffix='7') parser.define('blumhardtstrasse', suffix='14') parser.define('herrenhausen', suffix='12')
i += 1 def parse_url(url, today=False): canteen = LazyBuilder() parse_week(url, date.today(), canteen) if not today: parse_week(url, date.today() + date.resolution * 7, canteen) return canteen.toXMLFeed() parser = Parser( 'hamburg', handler=parse_url, shared_prefix='http://speiseplan.studierendenwerk-hamburg.de/de/') parser.define('armgartstrasse', suffix='590') parser.define('bergedorf', suffix='520') parser.define('berliner-tor', suffix='530') parser.define('botanischer-garten', suffix='560') parser.define('bucerius-law-school', suffix='410') parser.define('cafe-mittelweg', suffix='690') parser.define('cafe-cfel', suffix='680') parser.define('cafe-jungiusstrasse', suffix='610') parser.define('cafe-alexanderstrasse', suffix='660') parser.define('campus', suffix='340') parser.define('finkenau', suffix='420') parser.define('geomatikum', suffix='540') parser.define('harburg', suffix='570') parser.define('hcu', suffix='430') parser.define('philosophenturm', suffix='350') parser.define('stellingen', suffix='580')
elif group == 'Bed.:': prices['employee'] = price elif group == 'Gast:': prices['other'] = price canteen.addMeal(date, category, menuName, notes, prices) else: canteen.setDayClosed(date) # check for further pages nextPageLink = dom.find(id='next_day_link') if nextPageLink == None or today: url = None else: url = 'https://www.studentenwerk-rostock.de/' + nextPageLink['href'] return canteen.toXMLFeed() def parse_url(url, today=False): splitted = url.split('#') return parsePlan(splitted[0], splitted[1], today) parser = Parser('rostock', handler=parse_url, shared_prefix='https://www.studentenwerk-rostock.de/de/mensen/speiseplaene.html') parser.define('mensa-sued', suffix='#mensa_id_1') parser.define('campus-cafeteria-einstein', suffix='#mensa_id_13') parser.define('mensa-st-georg-straße', suffix='#mensa_id_2') parser.define('mensa-multiple-choice', suffix='#mensa_id_14') parser.define('mensa-kleine-ulme', suffix='#mensa_id_3') parser.define('mensa-ulme-69', suffix='#mensa_id_8') parser.define('campus-mensa-wismar', suffix='#mensa_id_5')
# map roles roles = {'Studenten': 'student', 'Bedienstete': 'employee', 'Gäste': 'other'} for item in items: raw_role, price = item.text.split(':') if raw_role in roles: prices[roles[raw_role]] = price return prices # name of canteens is suffix at the same time canteens = ['mensa-universitaetsstrasse-duesseldorf', 'mensa-kamp-lintfort', 'mensa-campus-derendorf', 'mensa-georg-glock-strasse-duesseldorf', 'mensa-obergath-krefeld', 'mensa-frankenring-krefeld', 'mensa-sommerdeich-kleve', 'mensa-rheydter-strasse-moenchengladbach', 'restaurant-bar-campus-vita-duesseldorf', 'essenausgabe-sued-duesseldorf', 'kunstakademie-duesseldorf', 'musikhochschule-duesseldorf'] parser = Parser('duesseldorf', handler=parse_url, shared_prefix='http://www.stw-d.de/gastronomie/speiseplaene/') for canteen in canteens: parser.define(canteen, suffix=canteen)
from bs4 import BeautifulSoup from pyopenmensa.feed import LazyBuilder from urllib.request import urlopen from utils import Parser def parse_week(url, canteen): soup = BeautifulSoup(urlopen(url).read(), 'lxml') plan_table = soup.find("table", "tabmensaplan") for day_span in plan_table.find_all("span", "tabDate"): meal_date = day_span.text + "2015" for index, meal_td in enumerate(day_span.parent.parent.find_all("td")): if index > 0 and index < 5: meal_text = meal_td.text meal_type = soup.find_all("span", "mvmensa")[index-1].text canteen.addMeal(meal_date, meal_type, meal_text) def parse_url(url, today): canteen = LazyBuilder() if not today: parse_week(url, canteen) return canteen.toXMLFeed() parser = Parser('siegen', handler=parse_url, shared_prefix='http://studentenwerk.uni-siegen.de/index.php?uid=650&uid2=0') parser.define('ar', suffix='&cat_show=1') parser.define('enc', suffix='&cat_show=2') parser.define('ars-mundi', suffix='&cat_show=3') parser.define('cafeterien', suffix='&cat_show=4')
title = item.find('title').text description = get_description(title) notes = build_notes_string(title) plist = [ item.find('preis1').text, item.find('preis2').text, item.find('preis3').text ] food_type = get_food_types(item.find('piktogramme').text) canteen.addMeal(date, food_type, description, notes, plist, roles) return canteen.toXMLFeed() parser = Parser( 'erlangen_nuernberg', handler=parse_url, shared_prefix= 'https://www.max-manager.de/daten-extern/sw-erlangen-nuernberg/xml/') parser.define('er-langemarck', suffix='mensa-lmp.xml') parser.define('er-sued', suffix='mensa-sued.xml') parser.define('n-schuett', suffix='mensa-inselschuett.xml') parser.define('n-regens', suffix='mensa-regensburgerstr.xml') parser.define('n-stpaul', suffix='mensateria-st-paul.xml') parser.define('n-mensateria', suffix='mensateria-ohm.xml') parser.define('n-hohfederstr', suffix='cafeteria-come-in.xml') parser.define('n-baerenschanzstr', suffix='cafeteria-baerenschanzstr.xml') parser.define('eichstaett', suffix='mensa-eichstaett.xml') parser.define('ingolstadt', suffix='mensa-ingolstadt.xml') parser.define('ansbach', suffix='mensa-ansbach.xml') parser.define('triesdorf', suffix='mensateria-triesdorf.xml')
def parse_week(url, canteen): soup = BeautifulSoup(urlopen(url).read(), 'lxml') plan_table = soup.find("table", "tabmensaplan") for day_span in plan_table.find_all("span", "tabDate"): meal_date = day_span.text + "2015" for index, meal_td in enumerate(day_span.parent.parent.find_all("td")): if index > 0 and index < 5: meal_text = meal_td.text meal_type = soup.find_all("span", "mvmensa")[index - 1].text canteen.addMeal(meal_date, meal_type, meal_text) def parse_url(url, today): canteen = LazyBuilder() if not today: parse_week(url, canteen) return canteen.toXMLFeed() parser = Parser( 'siegen', handler=parse_url, shared_prefix='http://studentenwerk.uni-siegen.de/index.php?uid=650&uid2=0' ) parser.define('ar', suffix='&cat_show=1') parser.define('enc', suffix='&cat_show=2') parser.define('ars-mundi', suffix='&cat_show=3') parser.define('cafeterien', suffix='&cat_show=4')
def parse_url(url, place_class=None, today=False): canteen = OpenMensaCanteen() parse_week(canteen, url, place_class) day = datetime.date.today() old = -1 day += datetime.date.resolution * 7 if not today: parse_week(canteen, '{}?kw={}'.format(url, day.isocalendar()[1]), place_class) day += datetime.date.resolution * 7 while not today and old != canteen.dayCount(): old = canteen.dayCount() parse_week(canteen, '{}?kw={}'.format(url, day.isocalendar()[1]), place_class) day += datetime.date.resolution * 7 return canteen.toXMLFeed() parser = Parser('karlsruhe', handler=parse_url, shared_args=['https://www.sw-ka.de/de/essen/']) parser.define('adenauerring', args=['canteen_place_1']) parser.define('moltke', args=['canteen_place_2']) parser.define('erzbergerstrasse', args=['canteen_place_3']) parser.define('schloss-gottesaue', args=['canteen_place_4']) parser.define('tiefenbronner-strasse', args=['canteen_place_5']) parser.define('holzgartenstrasse', args=['canteen_place_6'])
def parse_url(url, data_canteen, today=False): canteen = LazyBuilder() data = urlopen(url).read().decode('utf-8') document = parse(data, 'lxml') dish = document.find(class_='neo-menu-single-dishes') if dish is not None: dishes = dish.find_all(name='tr', attrs={"data-canteen": data_canteen}) else: dishes = [] side = document.find(class_='neo-menu-single-modals') if side is not None: dishes = dishes + side.find_all(name='tr', attrs={"data-canteen": data_canteen}) for dish in dishes: parse_dish(dish, canteen) return canteen.toXMLFeed() parser = Parser('marburg', handler=parse_url, shared_args=['https://studentenwerk-marburg.de/essen-trinken/speisekarte/']) parser.define('bistro', args=[460]) parser.define('mos-diner', args=[420]) parser.define('erlenring', args=[330]) parser.define('lahnberge', args=[340]) parser.define('cafeteria-lahnberge', args=[490])
day = datetime.date.today() emptyCount = 0 totalCount = 0 while emptyCount < 7 and totalCount < 32: if not parse_day(canteen, '{}&day={}&month={}&year={}&limit=25' .format(url, day.day, day.month, day.year), day.strftime('%Y-%m-%d')): emptyCount += 1 else: emptyCount = 0 if today: break totalCount += 1 day += datetime.date.resolution return canteen.toXMLFeed() parser = Parser('leipzig', handler=parse_url, shared_prefix='http://www.studentenwerk-leipzig.de/mensen-und-cafeterien/speiseplan/m/meals.php?canteen=') parser.define('dittrichring', suffix='153') parser.define('koburger-strasse', suffix='121') parser.define('philipp-rosenthal-strasse', suffix='127') parser.define('waechterstrasse', suffix='129') parser.define('academica', suffix='118') parser.define('am-park', suffix='106') parser.define('am-elsterbecken', suffix='115') parser.define('liebigstrasse', suffix='162') parser.define('peterssteinweg', suffix='111') parser.define('schoenauer-strasse', suffix='140') parser.define('tierklinik', suffix='170')
price_div = meal_article.find('div', 'price') if price_div is None: canteen.addMeal(date, category, name, notes) continue prices = {} for v, r in (('default', 'student'), ('bed', 'employee'), ('guest', 'other')): price = price_regex.search(price_div['data-' + v]) if price: prices[r] = price.group('price') elif v == 'default': prices = {} break canteen.addMeal(date, category, name, notes, prices) if closed_candidate and not canteen.hasMealsFor(date): canteen.setDayClosed(date) return canteen.toXMLFeed() parser = Parser('wuerzburg', handler=parse_url, shared_prefix='http://www.studentenwerk-wuerzburg.de/essen-trinken/speiseplaene/plan/show/') parser.define('austrasse', suffix='austrasse-bamberg.html') parser.define('burse', suffix='burse-wuerzburg.html') parser.define('feldkirchenstrasse', suffix='feldkirchenstrasse-bamberg.html') parser.define('frankenstube', suffix='frankenstube-wuerzburg.html') parser.define('hubland', suffix='mensa-am-hubland-wuerzburg.html') parser.define('studentenhaus', suffix='mensa-am-studentenhaus.html') parser.define('aschaffenburg', suffix='mensa-aschaffenburg') parser.define('augenklinik', suffix='mensa-augenklinik-wuerzburg.html') parser.define('josef-schneider', suffix='mensa-josef-schneider-strasse-wuerzburg.html') parser.define('schweinfurt', suffix='mensa-schweinfurt.html')
print(canteen.toXMLFeed()) return canteen.toXMLFeed() parser = Parser("ostniedersachsen", handler=parse_url, shared_prefix="http://www.stw-on.de") sub = parser.sub("braunschweig", shared_prefix="/braunschweig/essen/menus/") sub.define("mensa1-mittag", suffix="mensa-1", extra_args={"canteentype": "Mittagsmensa"}) sub.define("mensa1-abend", suffix="mensa-1", extra_args={"canteentype": "Abendmensa"}) sub.define( "mensa360", suffix="360", extra_args={"canteentype": "Pizza", "this_week": "-2", "next_week": "-nachste-woche"} ) sub.define("mensa2", suffix="mensa-2") sub.define("hbk", suffix="mensa-hbk") parser.define("clausthal", suffix="/clausthal/essen/menus/mensa-clausthal", extra_args={"next_week": "-kommend-woche"}) sub = parser.sub("hildesheim", shared_prefix="/hildesheim/essen/menus/") sub.define("uni", suffix="mensa-uni") sub.define("hohnsen", suffix="mensa-hohnsen") sub.define("luebecker-strasse", suffix="luebecker-strasse", extra_args={"canteentype": "Mittagsausgabe"}) parser.sub("suderburg").define("campus", suffix="/suderburg/essen/menus/mensa-suderburg") parser.sub("wolfenbuettel").define("ostfalia", suffix="/wolfenbuettel/essen/menus/mensa-ostfalia") parser.sub("holzminden", shared_prefix="/holzminden/essen/menus/").define( "hawk", suffix="mensa-hawk", extra_args={"next_week": False} ) sub = parser.sub("lueneburg", shared_prefix="/lueneburg/essen/speiseplaene/") sub.define("campus", suffix="mensa-campus") sub.define("rotes-feld", suffix="rotes-feld")
roles = {'Studenten': 'student', 'Studierende': 'student', 'Bedienstete': 'employee', 'Gäste': 'other'} for item in items: raw_role, price = item.text.split(':') if raw_role in roles: prices[roles[raw_role]] = price return prices # name of canteens is suffix at the same time canteens = ['mensa-universitaetsstrasse-duesseldorf', 'mensa-kamp-lintfort', 'mensa-campus-derendorf', 'mensa-georg-glock-strasse-duesseldorf', 'mensa-obergath-krefeld', 'mensa-frankenring-krefeld', 'mensa-sommerdeich-kleve', 'mensa-rheydter-strasse-moenchengladbach', 'restaurant-bar-campus-vita-duesseldorf', 'essenausgabe-sued-duesseldorf', 'kunstakademie-duesseldorf', 'musikhochschule-duesseldorf'] parser = Parser('duesseldorf', handler=parse_url, shared_prefix='http://www.stw-d.de/gastronomie/speiseplaene/') for canteen in canteens: parser.define(canteen, suffix=canteen)
.strip() canteen.addMeal(date, category, name, [legend.get(n, n) for n in notes], prices.get(price_category, {}) ) date += datetime.timedelta(days=1) if today: break return canteen.toXMLFeed() parser = Parser('muenchen', handler=parse_url, shared_prefix=base+'/speiseplan/') parser.define('leopoldstrasse', suffix='speiseplan_{}_411_-de.html') parser.define('martinsried', suffix='speiseplan_{}_412_-de.html') parser.define('grosshadern', suffix='speiseplan_{}_414_-de.html') parser.define('schellingstrasse', suffix='speiseplan_{}_416_-de.html') parser.define('archisstrasse', suffix='speiseplan_{}_421_-de.html') parser.define('garching', suffix='speiseplan_{}_422_-de.html') parser.define('weihenstephan', suffix='speiseplan_{}_423_-de.html') parser.define('lothstrasse', suffix='speiseplan_{}_431_-de.html') parser.define('pasing', suffix='speiseplan_{}_432_-de.html') parser.define('rosenheim', suffix='speiseplan_{}_441_-de.html') parser.define('adalbertstrasse', suffix='speiseplan_{}_512_-de.html') parser.define('cafeteria-garching', suffix='speiseplan_{}_524_-de.html') parser.define('wst', suffix='speiseplan_{}_525_-de.html') parser.define('akademie', suffix='speiseplan_{}_526_-de.html') parser.define('boltzmannstrasse', suffix='speiseplan_{}_527_-de.html') parser.define('karlstrasse', suffix='speiseplan_{}_532_-de.html')
prices = {} # notes: notes = [] for img in tr.contents[1].find_all('img'): notes.append(img['alt'].replace('Symbol', '').strip()) for extra in list( set(map(lambda v: int(v), extra_regex.findall(tr.text)))): if extra in extraLegend: notes.append(extraLegend[extra]) canteen.addMeal(date, 'Hauptgerichte', name, notes, prices, roles if prices else None) return canteen.toXMLFeed() parser = Parser('magdeburg', handler=parse_url, shared_prefix='https://www.studentenwerk-magdeburg.de/') parser.define('ovgu-unten', suffix='mensen-cafeterien/mensa-unicampus/speiseplan-unten/') parser.define('ovgu-oben', suffix='mensen-cafeterien/mensa-unicampus/speiseplan-oben/') parser.define('herrenkrug', suffix='mensen-cafeterien/mensa-herrenkrug/speiseplan/') parser.define('kellercafe', suffix='mensen-cafeterien/mensa-kellercafe/speiseplan/') parser.define('stendal', suffix='mensen-cafeterien/mensa-stendal/speiseplan/') parser.define('halberstadt', suffix='mensen-cafeterien/mensa-halberstadt/speiseplan/') parser.define('wernigerode', suffix='mensen-cafeterien/mensa-wernigerode/speiseplan/')
# multiple prices for a meal - keep all of them literally name = mealCellText price = None try: date=dates[dateIdx] canteen.addMeal(date, category=subCanteen, name=name, prices=price) except ValueError as e: print('Error adding meal {} on {}: {}'.format(name, date, e)) def parse_url(url, today): canteen = LazyBuilder() canteen.setAdditionalCharges('student', {}) if today: parse_week(url, canteen) # base url only contains current day else: parse_week(url + 'week', canteen) parse_week(url + 'nextweek', canteen) return canteen.toXMLFeed() parser = Parser('darmstadt', handler=parse_url, shared_prefix='https://www.stwda.de/components/com_spk/') parser.define('stadtmitte', suffix='spk_Stadtmitte_print.php?ansicht=') parser.define('lichtwiese', suffix='spk_Lichtwiese_print.php?ansicht=') parser.define('schoefferstrasse', suffix='spk_Schoefferstrasse_print.php?ansicht=') parser.define('dieburg', suffix='spk_Dieburg_print.php?ansicht=') parser.define('haardtring', suffix='spk_Haardtring_print.php?ansicht=')
for i in mtype.split(','): notes.append('ZT' + i) prices = [row[6], row[7], row[8]] mnotes = [] for i in notes: mnotes.append(legend.get(i, legend.get(i[2:], i))) try: canteen.addMeal(mdate, category, mname, mnotes, prices, roles) except ValueError as e: print('could not add meal {}/{} "{}" due to "{}"'.format( mdate, category, mname, e), file=sys.stderr) # empty meal ... pass return canteen.toXMLFeed() parser = Parser('niederbayern_oberpfalz', handler=parse_url, shared_prefix='http://www.stwno.de/infomax/daten-extern/csv/') parser.define('th-deggendorf', suffix='HS-DEG') parser.define('hs-landshut', suffix='HS-LA') parser.define('uni-passau', suffix='UNI-P') parser.define('oth-regensburg', suffix='HS-R-tag') parser.define('uni-regensburg', suffix='UNI-R')
date = dates[dateIdx] canteen.addMeal(date, category=subCanteen, name=name, prices=price) except ValueError as e: print('Error adding meal {} on {}: {}'.format(name, date, e)) def parse_url(url, today): canteen = LazyBuilder() canteen.setAdditionalCharges('student', {}) if today: parse_week(url, canteen) # base url only contains current day else: parse_week(url + 'week', canteen) parse_week(url + 'nextweek', canteen) return canteen.toXMLFeed() parser = Parser('darmstadt', handler=parse_url, shared_prefix='https://www.stwda.de/components/com_spk/') parser.define('stadtmitte', suffix='spk_Stadtmitte_print.php?ansicht=') parser.define('lichtwiese', suffix='spk_Lichtwiese_print.php?ansicht=') parser.define('schoefferstrasse', suffix='spk_Schoefferstrasse_print.php?ansicht=') parser.define('dieburg', suffix='spk_Dieburg_print.php?ansicht=') parser.define('haardtring', suffix='spk_Haardtring_print.php?ansicht=')
notes = [] for notematch in note_regex.findall(line): if notematch not in legends: print("unknown legend: {}".format(notematch)) continue notes.append(legends[notematch]) canteen.addMeal(date, category, name, notes, price_regex.findall(line), roles) def parse_url(url, today=False): canteen = LazyBuilder() parse_week(url + "&wann=2", canteen) if not today: parse_week(url + "&wann=3", canteen) return canteen.toXMLFeed() parser = Parser("hannover", handler=parse_url, shared_prefix="http://www.stwh-portal.de/mensa/index.php?format=txt&wo=") parser.define("hauptmensa", suffix="2") parser.define("hauptmensa-marktstand", suffix="9") parser.define("restaurant-ct", suffix="10") parser.define("contine", suffix="3") parser.define("pzh", suffix="13") parser.define("caballus", suffix="1") parser.define("tiho-tower", suffix="0") parser.define("hmtmh", suffix="8") parser.define("ricklinger-stadtweg", suffix="6") parser.define("kurt-schwitters-forum", suffix="7") parser.define("blumhardtstrasse", suffix="14") parser.define("herrenhausen", suffix="12")
('guest', 'other')): price = price_regex.search(price_div['data-' + v]) if price: prices[r] = price.group('price') elif v == 'default': prices = {} break canteen.addMeal(date, category, name, notes, prices) if closed_candidate and not canteen.hasMealsFor(date): canteen.setDayClosed(date) return canteen.toXMLFeed() parser = Parser( 'wuerzburg', handler=parse_url, shared_prefix= 'http://www.studentenwerk-wuerzburg.de/essen-trinken/speiseplaene/plan/show/' ) parser.define('austrasse', suffix='austrasse-bamberg.html') parser.define('burse', suffix='burse-wuerzburg.html') parser.define('feldkirchenstrasse', suffix='feldkirchenstrasse-bamberg.html') parser.define('frankenstube', suffix='frankenstube-wuerzburg.html') parser.define('hubland', suffix='mensa-am-hubland-wuerzburg.html') parser.define('studentenhaus', suffix='mensa-am-studentenhaus.html') parser.define('aschaffenburg', suffix='mensa-aschaffenburg') parser.define('augenklinik', suffix='mensa-augenklinik-wuerzburg.html') parser.define('josef-schneider', suffix='mensa-josef-schneider-strasse-wuerzburg.html') parser.define('schweinfurt', suffix='mensa-schweinfurt.html')
# todo only for: Tellergericht, vegetarisch, Klassiker, Empfehlung des Tages: canteen.setAdditionalCharges('student', {'other': 1.5}) document = parse(urlopen(url).read()) global legend regex = '(?P<name>(\d|[A-Z])+)\)\s*' + \ '(?P<value>\w+((\s+\w+)*[^0-9)]))' legend = buildLegend(legend, document.find(id='additives').text, regex=regex) days = ('montag', 'dienstag', 'mittwoch', 'donnerstag', 'freitag', 'montagNaechste', 'dienstagNaechste', 'mittwochNaechste', 'donnerstagNaechste', 'freitagNaechste') for day in days: data = document.find('div', id=day) headline = document.find('a', attrs={'data-anchor': '#' + day}) parse_day(canteen, headline.text, data) return canteen.toXMLFeed() parser = Parser('aachen', handler=parse_url, shared_prefix='http://www.studentenwerk-aachen.de/speiseplaene/') parser.define('academica', suffix='academica-w.html') parser.define('ahorn', suffix='ahornstrasse-w.html') parser.define('templergraben', suffix='templergraben-w.html') parser.define('bayernallee', suffix='bayernallee-w.html') parser.define('eups', suffix='eupenerstrasse-w.html') parser.define('goethe', suffix='goethestrasse-w.html') parser.define('vita', suffix='vita-w.html') parser.define('zeltmensa', suffix='forum-w.html') parser.define('juelich', suffix='juelich-w.html')
# Try to add the meal try: canteen.addMeal( mdate, category, mname, mnotes, prices, roles) except ValueError as e: print('could not add meal {}/{} "{}" due to "{}"'.format(mdate, category, mname, e), file=sys.stderr) # empty meal ... pass # return xml data return canteen.toXMLFeed() parser = Parser('niederbayern_oberpfalz', handler=parse_url, shared_prefix='http://www.stwno.de/infomax/daten-extern/csv/') parser.define('th-deggendorf', suffix='HS-DEG') parser.define('hs-landshut', suffix='HS-LA') parser.define('wz-straubing', suffix='HS-SR') parser.define('uni-passau', suffix='UNI-P') parser.define('unip-cafeteria-nikolakloster', suffix='Cafeteria-Nikolakloster') parser.define('oth-regensburg', suffix='HS-R-tag') parser.define('oth-regensburg-abends', suffix='HS-R-abend') parser.define('othr-cafeteria-pruefening', suffix='Cafeteria-Pruefening') parser.define('uni-regensburg', suffix='UNI-R') parser.define('unir-cafeteria-pt', suffix='Cafeteria-PT') parser.define('unir-cafeteria-chemie', suffix='Cafeteria-Chemie') parser.define('unir-cafeteria-milchbar', suffix='Cafeteria-Milchbar') parser.define('unir-cafeteria-sammelgebaeude', suffix='Cafeteria-Sammelgebaeude') parser.define('unir-cafeteria-sport', suffix='Cafeteria-Sport')
global legend canteen = LazyBuilder() canteen.setLegendData(legend) day = datetime.date.today() emptyCount = 0 totalCount = 0 while emptyCount < 7 and totalCount < 32: if not parse_day(canteen, '{}&tag={}&monat={}&jahr={}' .format(url, day.day, day.month, day.year), day.strftime('%Y-%m-%d')): emptyCount += 1 else: emptyCount = 0 if today: break totalCount += 1 day += datetime.date.resolution return canteen.toXMLFeed() parser = Parser('chemnitz_zwickau', handler=parse_url, shared_prefix='http://www.swcz.de/bilderspeiseplan/xml.php?plan=') parser.define('mensa-reichenhainer-strasse', suffix='1479835489') parser.define('mensa-strasse-der-nationen', suffix='773823070') parser.define('mensa-ring', suffix='4') parser.define('mensa-scheffelberg', suffix='3') parser.define('cafeteria-reichenhainer-strasse', suffix='7') parser.define('cafeteria-strasse-der-nationen', suffix='6') parser.define('cafeteria-ring', suffix='5') parser.define('cafeteria-scheffelberg', suffix='8')
food_type = parse_foot_type(tds[2]) food_description = get_foot_description(tds[3]) notes_string = build_notes_string(tds[3]) if (notes_string != ""): notes.append(notes_string) prices = get_pricing(tds, 4, 7) canteen.addMeal(date, food_type, food_description, notes, prices, roles if prices else None) except Exception as e: traceback.print_exception(*sys.exc_info()) return canteen.toXMLFeed() parser = Parser( 'erlangen_nuernberg', handler=parse_url, shared_prefix='http://www.studentenwerk.uni-erlangen.de/verpflegung/de/') parser.define('er-langemarck', suffix='sp-er-langemarck.shtml') parser.define('er-sued', suffix='sp-er-sued.shtml') parser.define('n-schuett', suffix='sp-n-schuett.shtml') parser.define('n-regens', suffix='sp-n-regens.shtml') parser.define('n-stpaul', suffix='sp-n-stpaul.shtml') parser.define('n-mensateria', suffix='sp-n-mensateria.shtml') parser.define('n-hohfederstr', suffix='sp-n-hohfederstr.shtml') parser.define('n-baerenschanzstr', suffix='sp-n-baerenschanzstr.shtml') parser.define('eichstaett', suffix='sp-eichstaett.shtml') parser.define('ingolstadt', suffix='sp-ingolstadt.shtml') parser.define('ansbach', suffix='sp-ansbach.shtml') parser.define('triesdorf', suffix='sp-triesdorf.shtml')
if supply['title']: supplies.append(supply['title']) elif 'price'in item['class']: price = item.text if 'student' in item['class']: student_price = getAndFormatPrice(price) elif 'staff' in item['class']: staff_price = getAndFormatPrice(price) elif 'guest' in item['class']: guest_price = getAndFormatPrice(price) if description != "": canteen.addMeal(wdate, category, description, notes=supplies, prices={'student': student_price, 'employee': staff_price, 'other': guest_price}) parser = Parser('dortmund', handler=parse_url, shared_prefix='https://www.stwdo.de/mensa-co/') parser.define('tu-hauptmensa', suffix='tu-dortmund/hauptmensa/') parser.define('tu-mensa-sued', suffix='tu-dortmund/mensa-sued/') parser.define('tu-vital', suffix='tu-dortmund/vital/') parser.define('tu-archeteria', suffix='tu-dortmund/archeteria/') parser.define('tu-calla', suffix='tu-dortmund/restaurant-calla/') parser.define('tu-food-fakultaet', suffix='tu-dortmund/food-fakultaet/') parser.define('fh-mensa-max-ophuels-platz', suffix='fh-dortmund/max-ophuels-platz/') parser.define('fh-mensa-sonnenstrasse', suffix='fh-dortmund/sonnenstrasse/') parser.define('fh-kostbar', suffix='fh-dortmund/mensa-kostbar/') parser.define('ism-mensa', suffix='ism/mensa-der-ism/') parser.define('fernuni-mensa', suffix='hagen') parser.define('fsw-snackit', suffix='fh-suedwestfalen/hagen/') parser.define('fsw-canape', suffix='fh-suedwestfalen/iserlohn/') parser.define('fsw-davinci', suffix='fh-suedwestfalen/meschede/') parser.define('fsw-mensa', suffix='fh-suedwestfalen/soest/')
extra_args={'canteentype': 'Mittagsmensa'}) sub.define('mensa1-abend', suffix='mensa-1', extra_args={'canteentype': 'Abendmensa'}) sub.define('mensa360', suffix='360', extra_args={ 'canteentype': 'Pizza', 'this_week': '-2', 'next_week': '-nachste-woche' }) sub.define('mensa2', suffix='mensa-2') sub.define('hbk', suffix='mensa-hbk') parser.define('clausthal', suffix='/clausthal/essen/menus/mensa-clausthal', extra_args={'next_week': '-kommend-woche'}) sub = parser.sub('hildesheim', shared_prefix='/hildesheim/essen/menus/') sub.define('uni', suffix='mensa-uni') sub.define('hohnsen', suffix='mensa-hohnsen') sub.define('luebecker-strasse', suffix='luebecker-strasse', extra_args={'canteentype': 'Mittagsausgabe'}) parser.sub('suderburg').define('campus', suffix='/suderburg/essen/menus/mensa-suderburg') parser.sub('wolfenbuettel').define( 'ostfalia', suffix='/wolfenbuettel/essen/menus/mensa-ostfalia') parser.sub('holzminden', shared_prefix='/holzminden/essen/menus/') \ .define('hawk', suffix='mensa-hawk', extra_args={'next_week': False})
return canteen.toXMLFeed() root = ET.fromstring(xml_data) for day in root: date = time.strftime('%d.%m.%Y', time.localtime(int(day.get('timestamp')))) for item in day: title = item.find('title').text description = get_description(title) notes = build_notes_string(title) plist = [item.find('preis1').text, item.find('preis2').text, item.find('preis3').text] food_type = get_food_types(item.find('piktogramme').text) canteen.addMeal(date, food_type, description, notes, plist, roles) return canteen.toXMLFeed() parser = Parser('erlangen_nuernberg', handler=parse_url, shared_prefix='https://www.max-manager.de/daten-extern/sw-erlangen-nuernberg/xml/') parser.define('er-langemarck', suffix='mensa-lmp.xml') parser.define('er-sued', suffix='mensa-sued.xml') parser.define('n-schuett', suffix='mensa-inselschuett.xml') parser.define('n-regens', suffix='mensa-regensburgerstr.xml') parser.define('n-stpaul', suffix='mensateria-st-paul.xml') parser.define('n-mensateria', suffix='mensateria-ohm.xml') parser.define('n-hohfederstr', suffix='cafeteria-come-in.xml') parser.define('n-baerenschanzstr', suffix='cafeteria-baerenschanzstr.xml') parser.define('eichstaett', suffix='mensa-eichstaett.xml') parser.define('ingolstadt', suffix='mensa-ingolstadt.xml') parser.define('ansbach', suffix='mensa-ansbach.xml') parser.define('triesdorf', suffix='mensateria-triesdorf.xml')
canteen.addMeal(date, category, name, [legend.get(n, n) for n in notes], prices.get(price_category, {})) date += datetime.timedelta(days=1) if today: break return canteen.toXMLFeed() parser = Parser('muenchen', handler=parse_url, shared_prefix=base + '/speiseplan/') parser.define('leopoldstrasse', suffix='speiseplan_{}_411_-de.html') parser.define('martinsried', suffix='speiseplan_{}_412_-de.html') parser.define('grosshadern', suffix='speiseplan_{}_414_-de.html') parser.define('schellingstrasse', suffix='speiseplan_{}_416_-de.html') parser.define('archisstrasse', suffix='speiseplan_{}_421_-de.html') parser.define('garching', suffix='speiseplan_{}_422_-de.html') parser.define('weihenstephan', suffix='speiseplan_{}_423_-de.html') parser.define('lothstrasse', suffix='speiseplan_{}_431_-de.html') parser.define('pasing', suffix='speiseplan_{}_432_-de.html') parser.define('rosenheim', suffix='speiseplan_{}_441_-de.html') parser.define('adalbertstrasse', suffix='speiseplan_{}_512_-de.html') parser.define('cafeteria-garching', suffix='speiseplan_{}_524_-de.html') parser.define('wst', suffix='speiseplan_{}_525_-de.html') parser.define('akademie', suffix='speiseplan_{}_526_-de.html') parser.define('boltzmannstrasse', suffix='speiseplan_{}_527_-de.html') parser.define('karlstrasse', suffix='speiseplan_{}_532_-de.html')
days = ( "montag", "dienstag", "mittwoch", "donnerstag", "freitag", "montagNaechste", "dienstagNaechste", "mittwochNaechste", "donnerstagNaechste", "freitagNaechste", ) for day in days: data = document.find("div", id=day) headline = document.find("a", attrs={"data-anchor": "#" + day}) parse_day(canteen, headline.text, data) return canteen.toXMLFeed() parser = Parser("aachen", handler=parse_url, shared_prefix="http://www.studentenwerk-aachen.de/speiseplaene/") parser.define("academica", suffix="academica-w.html") parser.define("ahorn", suffix="ahornstrasse-w.html") parser.define("templergraben", suffix="templergraben-w.html") parser.define("bayernallee", suffix="bayernallee-w.html") parser.define("eups", suffix="eupenerstrasse-w.html") parser.define("goethe", suffix="goethestrasse-w.html") parser.define("vita", suffix="vita-w.html") parser.define("zeltmensa", suffix="forum-w.html") parser.define("juelich", suffix="juelich-w.html")
continue notes.append(legends[notematch]) canteen.addMeal(date, category, name, notes, price_regex.findall(line), roles) def parse_url(url, today=False): canteen = LazyBuilder() parse_week(url + '&wann=2', canteen) if not today: parse_week(url + '&wann=3', canteen) return canteen.toXMLFeed() parser = Parser( 'hannover', handler=parse_url, shared_prefix='http://www.stwh-portal.de/mensa/index.php?format=txt&wo=') parser.define('hauptmensa', suffix='2') parser.define('hauptmensa-marktstand', suffix='9') parser.define('restaurant-ct', suffix='10') parser.define('contine', suffix='3') parser.define('pzh', suffix='13') parser.define('caballus', suffix='1') parser.define('tiho-tower', suffix='0') parser.define('hmtmh', suffix='8') parser.define('ricklinger-stadtweg', suffix='6') parser.define('kurt-schwitters-forum', suffix='7') parser.define('blumhardtstrasse', suffix='14') parser.define('herrenhausen', suffix='12')
# multiple components to choose from for component in components: canteen.addMeal(date, category, component, tags, prices) else: print('unknown meal type: {}'.format(group['type'])) def parse_url(url, today=False): canteen = LazyBuilder() day = datetime.date.today() for _ in range(21): parse_day(canteen, '{}&date={}'.format(url, day.strftime('%Y-%m-%d'))) if today: break day += datetime.timedelta(days=1) return canteen.toXMLFeed() parser = Parser('leipzig', handler=parse_url, shared_prefix='https://www.studentenwerk-leipzig.de/XMLInterface/request?location=') parser.define('dittrichring', suffix='153') parser.define('philipp-rosenthal-strasse', suffix='127') parser.define('academica', suffix='118') parser.define('am-park', suffix='106') parser.define('am-elsterbecken', suffix='115') parser.define('liebigstrasse', suffix='162') parser.define('peterssteinweg', suffix='111') parser.define('schoenauer-strasse', suffix='140') parser.define('tierklinik', suffix='170')
if price: prices[v] = price canteen.addMeal(date, category, name, notes, prices) if closed_candidate and not canteen.hasMealsFor(date): canteen.setDayClosed(date) return canteen.toXMLFeed() parser = Parser( 'wuerzburg', handler=parse_url, shared_prefix= 'https://www.studentenwerk-wuerzburg.de/essen-trinken/speiseplaene/plan/') parser.define('austrasse', suffix='austrasse-bamberg.html') parser.define('burse', suffix='burse-am-studentenhaus-wuerzburg.html') parser.define('feldkirchenstrasse', suffix='feldkirchenstrasse-bamberg.html') #parser.define('frankenstube', suffix='frankenstube-wuerzburg.html') #parser.define('hubland', suffix='mensa-am-hubland-wuerzburg.html') parser.define('studentenhaus', suffix='mensa-am-studentenhaus.html') parser.define('aschaffenburg', suffix='mensa-aschaffenburg.html') parser.define('augenklinik', suffix='mensa-roentgenring-wuerzburg.html') parser.define('josef-schneider', suffix='mensa-josef-schneider-strasse-wuerzburg.html') parser.define('schweinfurt', suffix='mensa-schweinfurt.html') parser.define('mensateria', suffix='mensateria-campus-hubland-nord-wuerzburg.html') parser.define('philo', suffix='philosophie-wuerzburg.html') parser.define('sprachenzentrum', suffix='interimsmensa-im-sprachenzentrum-ab-9-april-2018.html')
food_type = parse_foot_type(tds[2]) food_description = get_foot_description(tds[3]) notes_string = build_notes_string(tds[3]) if(notes_string != ""): notes.append(notes_string) prices = get_pricing(tds, 4, 7) if food_type is not None: canteen.addMeal(date, food_type, food_description, notes, prices, roles if prices else None) except Exception as e: traceback.print_exception(*sys.exc_info()) return canteen.toXMLFeed() parser = Parser('erlangen_nuernberg', handler=parse_url, shared_prefix='http://www.studentenwerk.uni-erlangen.de/verpflegung/de/') parser.define('er-langemarck', suffix='sp-er-langemarck.shtml') parser.define('er-sued', suffix='sp-er-sued.shtml') parser.define('n-schuett', suffix='sp-n-schuett.shtml') parser.define('n-regens', suffix='sp-n-regens.shtml') parser.define('n-stpaul', suffix='sp-n-stpaul.shtml') parser.define('n-mensateria', suffix='sp-n-mensateria.shtml') parser.define('n-hohfederstr', suffix='sp-n-hohfederstr.shtml') parser.define('n-baerenschanzstr', suffix='sp-n-baerenschanzstr.shtml') parser.define('eichstaett', suffix='sp-eichstaett.shtml') parser.define('ingolstadt', suffix='sp-ingolstadt.shtml') parser.define('ansbach', suffix='sp-ansbach.shtml') parser.define('triesdorf', suffix='sp-triesdorf.shtml')
name = meal_tr.contents[1].text # notes, to do canteen.addMeal(date, category, name, [], price_regex.findall(meal_tr.contents[2].text), roles) def parse_url(url, place_class=None, today=False): canteen = OpenMensaCanteen() parse_week(canteen, url, place_class) day = datetime.date.today() old = -1 day += datetime.date.resolution * 7 if not today: parse_week(canteen, '{}?kw={}'.format(url, day.isocalendar()[1]), place_class) day += datetime.date.resolution * 7 while not today and old != canteen.dayCount(): old = canteen.dayCount() parse_week(canteen, '{}?kw={}'.format(url, day.isocalendar()[1]), place_class) day += datetime.date.resolution * 7 return canteen.toXMLFeed() parser = Parser('karlsruhe', handler=parse_url, shared_args=['http://www.studentenwerk-karlsruhe.de/de/essen/']) parser.define('adenauerring', args=['canteen_place_1']) parser.define('moltke', args=['canteen_place_2']) parser.define('erzbergerstrasse', args=['canteen_place_3']) parser.define('schloss-gottesaue', args=['canteen_place_4']) parser.define('tiefenbronner-strasse', args=['canteen_place_5']) parser.define('holzgartenstrasse', args=['canteen_place_6'])
location_tag = meal_item.find('span', {'class': 'fmc-item-location'}) price_tag = meal_item.find('span', {'class': 'fmc-item-price'}) try: canteen.addMeal(date, category=location_tag.string, name=title_tag.string, prices=price_tag.string) except ValueError as e: print('Error adding meal "{}": {}'.format(meal_item, e)) def parse_url(url, today=False): canteen = LazyBuilder() canteen.setAdditionalCharges('student', {}) parse_week(url, canteen) return canteen.toXMLFeed() parser = Parser('darmstadt', handler=parse_url, shared_prefix='http://studierendenwerkdarmstadt.de/hochschulgastronomie/speisekarten/') parser.define('stadtmitte', suffix='stadtmitte/') parser.define('lichtwiese', suffix='lichtwiese/') parser.define('schoefferstrasse', suffix='schoefferstrasse/') parser.define('dieburg', suffix='dieburg/') parser.define('haardtring', suffix='haardtring/') # for debugging / testing if __name__ == "__main__": print(parser.parse("darmstadt", "stadtmitte", None))