示例#1
0
def parse_url(url, today=False):
    canteen = LazyBuilder()
    legend = {'f': 'fleischloses Gericht', 'v': 'veganes Gericht'}
    document = parse(urlopen(base + '/speiseplan/zusatzstoffe-de.html').read())
    for td in document.find_all('td', 'beschreibung'):
        legend[td.previous_sibling.previous_sibling.text] = td.text
    document = parse(urlopen(base + '/unsere-preise/').read())
    prices = {}
    for tr in document.find('table', 'essenspreise').find_all('tr'):
        meal = tr.find('th')
        if not meal or not meal.text.strip():
            continue
        if len(tr.find_all('td', 'betrag')) < 3:
            continue
        if 'titel' in meal.attrs.get('class', []) or 'zeilentitel' in meal.attrs.get('class', []):
            continue
        meal = meal.text.strip()
        prices[meal] = {}
        for role, _id in [('student', 0), ('employee', 1), ('other', 2)]:
            price_html = tr.find_all('td', 'betrag')[_id].text
            price_search = price_regex.search(price_html)
            if price_search:
                prices[meal][role] = price_search.group('price')
    errorCount = 0
    date = datetime.date.today()
    while errorCount < 7:
        try:
            document = parse(urlopen(url.format(date)).read())
        except HTTPError as e:
            if e.code == 404:
                errorCount += 1
                date += datetime.date.resolution
                continue
            else:
                raise e
        else:
            errorCount = 0
        for tr in document.find('table', 'zusatzstoffe').find_all('tr'):
            identifier = tr.find_all('td')[0].text \
                           .replace('(', '').replace(')', '')
            legend[identifier] = tr.find_all('td')[1].text.strip()
        canteen.setLegendData(legend)
        mensa_data = document.find('table', 'menu')
        category = None
        for menu_tr in mensa_data.find_all('tr'):
            if menu_tr.find('td', 'headline'):
                continue
            if menu_tr.find('td', 'gericht').text:
                category = menu_tr.find('td', 'gericht').text
            data = menu_tr.find('td', 'beschreibung')
            name = data.find('span').text.strip()
            notes = [span['title'] for span in data.find_all('span', title=True)]
            canteen.addMeal(
                date, category, name, notes,
                prices.get(category.replace('Aktionsessen', 'Bio-/Aktionsgericht'), {})
            )
        date += datetime.date.resolution
        if today:
            break
    return canteen.toXMLFeed()
def parse_url(url, today=False, canteentype='Mittagsmensa', this_week='', next_week=True, legend_url=None):
    canteen = LazyBuilder()
    canteen.legendKeyFunc = lambda v: v.lower()
    if not legend_url:
        legend_url = url[:url.find('essen/') + 6] + 'lebensmittelkennzeichnung'
    legend_doc = parse(urlopen(legend_url))
    canteen.setLegendData(
        text=legend_doc.find(id='artikel').text,
        regex=r'(?P<name>(\d+|[A-Z]+))\s+=\s+(?P<value>\w+( |\t|\w)*)'
    )
    parse_week(url + this_week, canteen, canteentype)
    if not today and next_week is True:
        parse_week(url + '-kommende-woche', canteen, canteentype)
    if not today and type(next_week) is str:
        parse_week(url + next_week, canteen, canteentype)
    return canteen.toXMLFeed()
def parse_url(url, today=False):
    global legend
    canteen = LazyBuilder()
    canteen.setLegendData(legend)
    day = datetime.date.today()
    emptyCount = 0
    totalCount = 0
    while emptyCount < 7 and totalCount < 32:
        if not parse_day(canteen, '{}&tag={}&monat={}&jahr={}'
                         .format(url, day.day, day.month, day.year),
                         day.strftime('%Y-%m-%d')):
            emptyCount += 1
        else:
            emptyCount = 0
        if today:
            break
        totalCount += 1
        day += datetime.date.resolution
    return canteen.toXMLFeed()
示例#4
0
def parse_url(url, today=False):
    global legend
    canteen = LazyBuilder()
    canteen.setLegendData(legend)
    day = datetime.date.today()
    emptyCount = 0
    totalCount = 0
    while emptyCount < 7 and totalCount < 32:
        if not parse_day(
                canteen, '{}&tag={}&monat={}&jahr={}'.format(
                    url, day.day, day.month, day.year),
                day.strftime('%Y-%m-%d')):
            emptyCount += 1
        else:
            emptyCount = 0
        if today:
            break
        totalCount += 1
        day += datetime.date.resolution
    return canteen.toXMLFeed()
示例#5
0
def parse_url(url, today=False):
    canteen = LazyBuilder()

    canteen.extra_regex = re.compile('\((?P<extra>[0-9a-zA-Z]{1,3}'
                                     '(?:,[0-9a-zA-Z]{1,3})*)\)', re.UNICODE)

    legend_url = 'https://www.stwdo.de/mensa-co/allgemein/zusatzstoffe/'
    legend = parse_legend(legend_url)
    canteen.setLegendData(legend)

    day = datetime.date.today()
    week = getWeekdays(day)

    for wDay in week:
        py = {'tx_pamensa_mensa[date]' : wDay}
        payload = urlencode(py).encode('ascii')
        data = rq.urlopen(url, payload).read().decode('utf-8')
        soup = BeautifulSoup(data, 'html.parser')
        parse_day(canteen, soup, wDay)

    return canteen.toXMLFeed()
示例#6
0
def parse_url(url, today=False):
    canteen = LazyBuilder()

    canteen.extra_regex = re.compile(
        '\((?P<extra>[0-9a-zA-Z]{1,3}'
        '(?:,[0-9a-zA-Z]{1,3})*)\)', re.UNICODE)

    legend_url = 'https://www.stwdo.de/mensa-co/allgemein/zusatzstoffe/'
    legend = parse_legend(legend_url)
    canteen.setLegendData(legend)

    day = datetime.date.today()
    week = getWeekdays(day)

    for wDay in week:
        py = {'tx_pamensa_mensa[date]': wDay}
        payload = urlencode(py).encode('ascii')
        data = rq.urlopen(url, payload).read().decode('utf-8')
        soup = BeautifulSoup(data, 'html.parser')
        parse_day(canteen, soup, wDay)

    return canteen.toXMLFeed()
示例#7
0
def parse_url(url, today=False):
    canteen = LazyBuilder()
    legend = {'f': 'fleischloses Gericht', 'v': 'veganes Gericht'}
    document = parse(urlopen(base + '/speiseplan/zusatzstoffe-de.html').read())
    for td in document.find_all('td', 'beschreibung'):
        legend[td.parent.find('td', 'gericht').text] = td.text
    document = parse(urlopen(base + '/mensa-preise/').read())
    prices = {}
    for tr in document.find('div', 'ce-bodytext').find_all('tr'):
        meal = tr.find('th')
        if not meal or not meal.text.strip():
            continue
        if len(tr.find_all('td', 'betrag')) < 3:
            continue
        if 'titel' in meal.attrs.get(
                'class', []) or 'zeilentitel' in meal.attrs.get('class', []):
            continue
        meal = meal.text.strip()
        prices[meal] = {}
        for role, _id in [('student', 0), ('employee', 1), ('other', 2)]:
            price_html = tr.find_all('td', 'betrag')[_id].text
            price_search = price_regex.search(price_html)
            if price_search:
                prices[meal][role] = price_search.group('price')
    errorCount = 0
    date = datetime.date.today()
    while errorCount < 7:
        try:
            document = parse(urlopen(url.format(date)).read())
        except HTTPError as e:
            if e.code == 404:
                errorCount += 1
                date += datetime.date.resolution
                continue
            else:
                raise e
        else:
            errorCount = 0
        for tr in document.find('table', 'zusatzstoffe').find_all('tr'):
            identifier = tr.find_all('td')[0].text \
                           .replace('(', '').replace(')', '')
            legend[identifier] = tr.find_all('td')[1].text.strip()
        canteen.setLegendData(legend)
        mensa_data = document.find('table', 'menu')
        category = None
        for menu_tr in mensa_data.find_all('tr'):
            if menu_tr.find('td', 'headline'):
                continue
            if menu_tr.find('td', 'gericht').text:
                category = menu_tr.find('td', 'gericht').text
            data = menu_tr.find('td', 'beschreibung')
            name = data.find('span').text.strip()
            if not name:
                continue
            notes = [
                span['title'] for span in data.find_all('span', title=True)
            ]
            canteen.addMeal(
                date, category, name, notes,
                prices.get(
                    category.replace('Aktionsessen', 'Bio-/Aktionsgericht'),
                    {}))
        date += datetime.date.resolution
        if today:
            break
    return canteen.toXMLFeed()