def parse_url(url, today=False): canteen = LazyBuilder() legend = {'f': 'fleischloses Gericht', 'v': 'veganes Gericht'} document = parse(urlopen(base + '/speiseplan/zusatzstoffe-de.html').read()) for td in document.find_all('td', 'beschreibung'): legend[td.previous_sibling.previous_sibling.text] = td.text document = parse(urlopen(base + '/unsere-preise/').read()) prices = {} for tr in document.find('table', 'essenspreise').find_all('tr'): meal = tr.find('th') if not meal or not meal.text.strip(): continue if len(tr.find_all('td', 'betrag')) < 3: continue if 'titel' in meal.attrs.get('class', []) or 'zeilentitel' in meal.attrs.get('class', []): continue meal = meal.text.strip() prices[meal] = {} for role, _id in [('student', 0), ('employee', 1), ('other', 2)]: price_html = tr.find_all('td', 'betrag')[_id].text price_search = price_regex.search(price_html) if price_search: prices[meal][role] = price_search.group('price') errorCount = 0 date = datetime.date.today() while errorCount < 7: try: document = parse(urlopen(url.format(date)).read()) except HTTPError as e: if e.code == 404: errorCount += 1 date += datetime.date.resolution continue else: raise e else: errorCount = 0 for tr in document.find('table', 'zusatzstoffe').find_all('tr'): identifier = tr.find_all('td')[0].text \ .replace('(', '').replace(')', '') legend[identifier] = tr.find_all('td')[1].text.strip() canteen.setLegendData(legend) mensa_data = document.find('table', 'menu') category = None for menu_tr in mensa_data.find_all('tr'): if menu_tr.find('td', 'headline'): continue if menu_tr.find('td', 'gericht').text: category = menu_tr.find('td', 'gericht').text data = menu_tr.find('td', 'beschreibung') name = data.find('span').text.strip() notes = [span['title'] for span in data.find_all('span', title=True)] canteen.addMeal( date, category, name, notes, prices.get(category.replace('Aktionsessen', 'Bio-/Aktionsgericht'), {}) ) date += datetime.date.resolution if today: break return canteen.toXMLFeed()
def parse_url(url, today=False, canteentype='Mittagsmensa', this_week='', next_week=True, legend_url=None): canteen = LazyBuilder() canteen.legendKeyFunc = lambda v: v.lower() if not legend_url: legend_url = url[:url.find('essen/') + 6] + 'lebensmittelkennzeichnung' legend_doc = parse(urlopen(legend_url)) canteen.setLegendData( text=legend_doc.find(id='artikel').text, regex=r'(?P<name>(\d+|[A-Z]+))\s+=\s+(?P<value>\w+( |\t|\w)*)' ) parse_week(url + this_week, canteen, canteentype) if not today and next_week is True: parse_week(url + '-kommende-woche', canteen, canteentype) if not today and type(next_week) is str: parse_week(url + next_week, canteen, canteentype) return canteen.toXMLFeed()
def parse_url(url, today=False): global legend canteen = LazyBuilder() canteen.setLegendData(legend) day = datetime.date.today() emptyCount = 0 totalCount = 0 while emptyCount < 7 and totalCount < 32: if not parse_day(canteen, '{}&tag={}&monat={}&jahr={}' .format(url, day.day, day.month, day.year), day.strftime('%Y-%m-%d')): emptyCount += 1 else: emptyCount = 0 if today: break totalCount += 1 day += datetime.date.resolution return canteen.toXMLFeed()
def parse_url(url, today=False): global legend canteen = LazyBuilder() canteen.setLegendData(legend) day = datetime.date.today() emptyCount = 0 totalCount = 0 while emptyCount < 7 and totalCount < 32: if not parse_day( canteen, '{}&tag={}&monat={}&jahr={}'.format( url, day.day, day.month, day.year), day.strftime('%Y-%m-%d')): emptyCount += 1 else: emptyCount = 0 if today: break totalCount += 1 day += datetime.date.resolution return canteen.toXMLFeed()
def parse_url(url, today=False): canteen = LazyBuilder() canteen.extra_regex = re.compile('\((?P<extra>[0-9a-zA-Z]{1,3}' '(?:,[0-9a-zA-Z]{1,3})*)\)', re.UNICODE) legend_url = 'https://www.stwdo.de/mensa-co/allgemein/zusatzstoffe/' legend = parse_legend(legend_url) canteen.setLegendData(legend) day = datetime.date.today() week = getWeekdays(day) for wDay in week: py = {'tx_pamensa_mensa[date]' : wDay} payload = urlencode(py).encode('ascii') data = rq.urlopen(url, payload).read().decode('utf-8') soup = BeautifulSoup(data, 'html.parser') parse_day(canteen, soup, wDay) return canteen.toXMLFeed()
def parse_url(url, today=False): canteen = LazyBuilder() canteen.extra_regex = re.compile( '\((?P<extra>[0-9a-zA-Z]{1,3}' '(?:,[0-9a-zA-Z]{1,3})*)\)', re.UNICODE) legend_url = 'https://www.stwdo.de/mensa-co/allgemein/zusatzstoffe/' legend = parse_legend(legend_url) canteen.setLegendData(legend) day = datetime.date.today() week = getWeekdays(day) for wDay in week: py = {'tx_pamensa_mensa[date]': wDay} payload = urlencode(py).encode('ascii') data = rq.urlopen(url, payload).read().decode('utf-8') soup = BeautifulSoup(data, 'html.parser') parse_day(canteen, soup, wDay) return canteen.toXMLFeed()
def parse_url(url, today=False): canteen = LazyBuilder() legend = {'f': 'fleischloses Gericht', 'v': 'veganes Gericht'} document = parse(urlopen(base + '/speiseplan/zusatzstoffe-de.html').read()) for td in document.find_all('td', 'beschreibung'): legend[td.parent.find('td', 'gericht').text] = td.text document = parse(urlopen(base + '/mensa-preise/').read()) prices = {} for tr in document.find('div', 'ce-bodytext').find_all('tr'): meal = tr.find('th') if not meal or not meal.text.strip(): continue if len(tr.find_all('td', 'betrag')) < 3: continue if 'titel' in meal.attrs.get( 'class', []) or 'zeilentitel' in meal.attrs.get('class', []): continue meal = meal.text.strip() prices[meal] = {} for role, _id in [('student', 0), ('employee', 1), ('other', 2)]: price_html = tr.find_all('td', 'betrag')[_id].text price_search = price_regex.search(price_html) if price_search: prices[meal][role] = price_search.group('price') errorCount = 0 date = datetime.date.today() while errorCount < 7: try: document = parse(urlopen(url.format(date)).read()) except HTTPError as e: if e.code == 404: errorCount += 1 date += datetime.date.resolution continue else: raise e else: errorCount = 0 for tr in document.find('table', 'zusatzstoffe').find_all('tr'): identifier = tr.find_all('td')[0].text \ .replace('(', '').replace(')', '') legend[identifier] = tr.find_all('td')[1].text.strip() canteen.setLegendData(legend) mensa_data = document.find('table', 'menu') category = None for menu_tr in mensa_data.find_all('tr'): if menu_tr.find('td', 'headline'): continue if menu_tr.find('td', 'gericht').text: category = menu_tr.find('td', 'gericht').text data = menu_tr.find('td', 'beschreibung') name = data.find('span').text.strip() if not name: continue notes = [ span['title'] for span in data.find_all('span', title=True) ] canteen.addMeal( date, category, name, notes, prices.get( category.replace('Aktionsessen', 'Bio-/Aktionsgericht'), {})) date += datetime.date.resolution if today: break return canteen.toXMLFeed()