def parse_url(url, today=False, canteentype="Mittagsmensa", this_week="", next_week=True, legend_url=None): canteen = LazyBuilder() canteen.legendKeyFunc = lambda v: v.lower() if not legend_url: legend_url = url[: url.find("essen/") + 6] + "wissenswertes/lebensmittelkennzeichnung" legend_doc = parse(urlopen(legend_url)).find(id="artikel") allergene = buildLegend( text=legend_doc.text.replace("\xa0", " "), regex=r"(?P<name>[A-Z]+) {3,}enthält (?P<value>\w+( |\t|\w)*)" ) allergene["EI"] = "Ei" zusatzstoffe = buildLegend( text=legend_doc.text.replace("\xa0", " "), regex=r"(?P<name>\d+) {3,} (enthält )?(?P<value>\w+( |\t|\w)*)" ) for tr in legend_doc.find_all("tr"): tds = tr.find_all("td") if len(tds) != 2: continue title = tds[0].find("strong") if title is None: continue else: title = title.text text = tds[1].text.replace("enthält", "").strip() if title.isdigit(): zusatzstoffe[title] = text else: allergene[title] = text parse_week(url + this_week, canteen, canteentype, allergene=allergene, zusatzstoffe=zusatzstoffe) if not today and next_week is True: parse_week(url + "-kommende-woche", canteen, canteentype, allergene=allergene, zusatzstoffe=zusatzstoffe) if not today and type(next_week) is str: parse_week(url + next_week, canteen, canteentype, allergene=allergene, zusatzstoffe=zusatzstoffe) print(canteen.toXMLFeed()) return canteen.toXMLFeed()
def parse_url(url, today=False, canteentype='Mittagsmensa', this_week='', next_week=True, legend_url=None): canteen = LazyBuilder() canteen.legendKeyFunc = lambda v: v.lower() if not legend_url: legend_url = url[:url.find('essen/') + 6] + 'wissenswertes/lebensmittelkennzeichnung' legend_doc = parse(urlopen(legend_url), 'lxml').find(id='artikel') allergene = buildLegend( text=legend_doc.text.replace('\xa0', ' '), regex=r'(?P<name>[A-Z]+) {3,}enthält (?P<value>\w+( |\t|\w)*)') allergene['EI'] = 'Ei' zusatzstoffe = buildLegend( text=legend_doc.text.replace('\xa0', ' '), regex=r'(?P<name>\d+) {3,} (enthält )?(?P<value>\w+( |\t|\w)*)') suballergene = re.compile( r'(?P<name>[0-9A-Z]+)[^a-zA-Z]*enthält (?P<value>\w+( |\t|\w)*)') for tr in legend_doc.find_all('tr'): tds = tr.find_all('td') if len(tds) != 2: continue title = tds[0].find('strong') if title is None: continue else: title = title.text lines = tds[1].text.split('\n') for line in lines[1:]: try_allergine = suballergene.match(line) if try_allergine: allergene[try_allergine.group('name')] = try_allergine.group( 'value') text = lines[0].replace('enthält', '').strip() if title.isdigit(): zusatzstoffe[title] = text else: allergene[title] = text parse_week(url + this_week, canteen, canteentype, allergene=allergene, zusatzstoffe=zusatzstoffe) if not today and next_week is True: parse_week(url + '-kommende-woche', canteen, canteentype, allergene=allergene, zusatzstoffe=zusatzstoffe) if not today and type(next_week) is str: parse_week(url + next_week, canteen, canteentype, allergene=allergene, zusatzstoffe=zusatzstoffe) return canteen.toXMLFeed()
def parse_url(url, today=False, canteentype='Mittagsmensa', this_week='', next_week=True, legend_url=None): canteen = LazyBuilder() canteen.legendKeyFunc = lambda v: v.lower() if not legend_url: legend_url = url[:url.find('essen/') + 6] + 'lebensmittelkennzeichnung' legend_doc = parse(urlopen(legend_url)) canteen.setLegendData( text=legend_doc.find(id='artikel').text, regex=r'(?P<name>(\d+|[A-Z]+))\s+=\s+(?P<value>\w+( |\t|\w)*)' ) parse_week(url + this_week, canteen, canteentype) if not today and next_week is True: parse_week(url + '-kommende-woche', canteen, canteentype) if not today and type(next_week) is str: parse_week(url + next_week, canteen, canteentype) return canteen.toXMLFeed()
def parse_url(url, today=False, canteentype='Mittagsmensa', this_week='', next_week=True, legend_url=None): canteen = LazyBuilder() canteen.legendKeyFunc = lambda v: v.lower() if not legend_url: legend_url = url[:url.find('essen/') + 6] + 'wissenswertes/lebensmittelkennzeichnung' legend_doc = parse(urlopen(legend_url), 'lxml').find(id='artikel') allergene = buildLegend( text=legend_doc.text.replace('\xa0', ' '), regex=r'(?P<name>[A-Z]+) {3,}enthält (?P<value>\w+( |\t|\w)*)' ) allergene['EI'] = 'Ei' zusatzstoffe = buildLegend( text=legend_doc.text.replace('\xa0', ' '), regex=r'(?P<name>\d+) {3,} (enthält )?(?P<value>\w+( |\t|\w)*)' ) suballergene = re.compile(r'(?P<name>[0-9A-Z]+)[^a-zA-Z]*enthält (?P<value>\w+( |\t|\w)*)') for tr in legend_doc.find_all('tr'): tds = tr.find_all('td') if len(tds) != 2: continue title = tds[0].find('strong') if title is None: continue else: title = title.text lines = tds[1].text.split('\n') for line in lines[1:]: try_allergine = suballergene.match(line) if try_allergine: allergene[try_allergine.group('name')] = try_allergine.group('value') text = lines[0].replace('enthält', '').strip() if title.isdigit(): zusatzstoffe[title] = text else: allergene[title] = text parse_week(url + this_week, canteen, canteentype, allergene=allergene, zusatzstoffe=zusatzstoffe) if not today and next_week is True: parse_week(url + '-kommende-woche', canteen, canteentype, allergene=allergene, zusatzstoffe=zusatzstoffe) if not today and type(next_week) is str: parse_week(url + next_week, canteen, canteentype, allergene=allergene, zusatzstoffe=zusatzstoffe) return canteen.toXMLFeed()