示例#1
0
def parse_url(url, today=False):
    canteen = LazyBuilder()
    document = parse(urlopen(url).read())
    for day_div in document.find_all('div', 'day') + document.find_all(
            'article', attrs={'data-day': True}):
        # parse date, warning: calculate year number needed
        date_test = day_regex.search(day_div['data-day'])
        if not date_test:
            print('Error: unable to parse date')
            continue
        else:
            year = datetime.datetime.now().year
            if datetime.datetime.now().month > int(date_test.group('month')):
                year += 1  # date from next year
            date = "{}-{}-{}".format(
                year,
                date_test.group('month'),
                date_test.group('day'),
            )
        if 'nodata' in day_div.attrs.get('class',
                                         []) or 'GESCHLOSSEN' in day_div.text:
            canteen.setDayClosed(date)
            continue
        closed_candidate = False
        for meal_article in day_div.find_all('article', 'menu'):
            name = meal_article.find('div', 'title').text
            if not name:
                continue
            if 'geschlossen' in name:
                closed_candidate = True
                continue
            category = meal_article.find('div')['title']
            notes = [
                v['title'] for v in meal_article.find_all('div', 'theicon')
                if v['title']
            ]
            if meal_article.find('div', 'additive'):
                notes += [
                    v[0] for v in extra_regex.findall(
                        meal_article.find('div', 'additive').text)
                ]
            price_div = meal_article.find('div', 'price')
            if price_div is None:
                canteen.addMeal(date, category, name, notes)
                continue
            prices = {}
            for v, r in (('default', 'student'), ('bed', 'employee'),
                         ('guest', 'other')):
                price = price_regex.search(price_div['data-' + v])
                if price:
                    prices[r] = price.group('price')
                elif v == 'default':
                    prices = {}
                    break
            canteen.addMeal(date, category, name, notes, prices)
        if closed_candidate and not canteen.hasMealsFor(date):
            canteen.setDayClosed(date)
    return canteen.toXMLFeed()
示例#2
0
def parse_url(url, today=False):
    content = urlopen(url).read()
    document = parse(content, 'lxml')
    legends = document.find_all('div', {'class': 'legende'})
    if len(legends) > 0:
        extraLegend = {
            int(v[0]): v[1]
            for v in reversed(legend_regex.findall(legends[0].text))
        }
    else:
        extraLegend = {}
    canteen = LazyBuilder()
    for day_td in document.find_all('td', text=day_regex):
        date = day_regex.search(day_td.string).group('date')
        table = None
        for element in day_td.parents:
            if element.name == 'table':
                table = element
                break
        if not table:
            continue
        for tr in table.tbody.find_all('tr'):
            if 'geschlossen' in tr.text or 'Feiertage' in tr.text:
                match = day_range_regex.search(tr.text)
                if not match:
                    canteen.setDayClosed(date)
                else:
                    fromDate = datetime.datetime.strptime(
                        match.group('from'), '%d.%m.%Y')
                    toDate = datetime.datetime.strptime(
                        match.group('to'), '%d.%m.%Y')
                    while fromDate <= toDate:
                        canteen.setDayClosed(fromDate.strftime('%Y-%m-%d'))
                        fromDate += datetime.date.resolution
                continue
            if len(tr) != 2:
                continue  # no meal
            strings = list(tr.contents[0].strings)
            name = strings[0]
            # prices:
            prices = strings[-1].split('|')
            if '-' in map(lambda v: v.strip(), prices):
                prices = {}
            # notes:
            notes = []
            for img in tr.contents[1].find_all('img'):
                notes.append(img['alt'].replace('Symbol', '').strip())
            for extra in list(
                    set(map(lambda v: int(v), extra_regex.findall(tr.text)))):
                if extra in extraLegend:
                    notes.append(extraLegend[extra])
            canteen.addMeal(date, 'Hauptgerichte', name, notes, prices,
                            roles if prices else None)
    return canteen.toXMLFeed()
示例#3
0
def parsePlan(url, internalMensaId, today):
    canteen = LazyBuilder()
    end = False
    while (url != None):
        dom = BeautifulSoup(urlopen(url).read(), 'lxml')
        date = dom.select('#mensa_date > p')[0].contents[0]
        menuDefinition = dom.find(id=internalMensaId)
        menuDescription = menuDefinition.parent.find('dd')
        tables = menuDescription.select('table')
        legend = {}
        legend = buildLegend(legend, str(dom), regex='<strong>(?P<name>\w+)\s*</strong>\s*-\s*(?P<value>[\w\s)(]+)')
        if tables != None and len(tables) == 1:
            table = tables[0]
            rows = table.find_all('tr')
            for row in rows:
                menuNameElement = row.select('td[class="mensa_col_55"] > b')
                if menuNameElement != None and menuNameElement[0].contents != None:
                    menuName = menuNameElement[0].contents[0]
                    category = 'Gericht'

                    # get notes
                    notes = {}
                    notesElement = row.select('td[class="mensa_col_55"] > span')
                    if notesElement != None and len(notesElement) > 0 and notesElement[0].text != None:
                        notes = [legend.get(n, n) for n in notesElement[0].text.split(' ') if n]

                    # get prices
                    prices = {}
                    for td in row.select('td[class="mensa_col_15"]'):
                        priceElement = td.find('b')
                        groupElement = td.find('span')
                        if priceElement != None and groupElement != None and groupElement.contents != None and len(groupElement.contents) > 0 and priceElement.contents != None and len(priceElement.contents) > 0:
                            group = str(groupElement.contents[0])
                            price = str(priceElement.contents[0])
                            if group == 'Stud.:':
                                prices['student'] = price
                            elif group == 'Bed.:':
                                prices['employee'] = price
                            elif group == 'Gast:':
                                prices['other'] = price

                    canteen.addMeal(date, category, menuName, notes, prices)
        else:
            canteen.setDayClosed(date)

        # check for further pages
        nextPageLink = dom.find(id='next_day_link')
        if nextPageLink == None or today:
            url = None
        else:
            url = 'https://www.studentenwerk-rostock.de/' + nextPageLink['href']
    return canteen.toXMLFeed()
def parse_url(url, today=False):
    canteen = LazyBuilder()
    document = parse(urlopen(url).read(), 'lxml')

    for day_div in document.find_all('div', attrs={'data-day': True}):
        # parse date, warning: calculate year number needed
        date_test = day_regex.search(day_div['data-day'])
        if not date_test:
            print('Error: unable to parse date "{}"'.format(
                day_div['data-day']))
            continue
        else:
            year = datetime.datetime.now().year
            if datetime.datetime.now().month > int(date_test.group('month')):
                year += 1  # date from next year
            date = '{}-{}-{}'.format(year, date_test.group('month'),
                                     date_test.group('day'))

        closed_candidate = day_div.find('div', 'holiday') is not None

        for meal_article in day_div.find_all('article', 'menu'):
            name = meal_article.find('div', 'title').text
            if not name:
                continue

            category = meal_article.find('div', 'icon')['title']
            notes = []
            prices = {}

            additives = meal_article.find('div', 'additnr')
            if additives:
                notes += [
                    additive.text for additive in additives.find_all('li')
                ]
            notes += [
                v['title'] for v in meal_article.find_all('div', 'theicon')
                if v['title'] and v['title'] not in notes
            ]

            price_div = meal_article.find('div', 'price')
            if price_div:
                for k, v in price_map.items():
                    price = price_div['data-' + k]
                    if price:
                        prices[v] = price
            canteen.addMeal(date, category, name, notes, prices)

        if closed_candidate and not canteen.hasMealsFor(date):
            canteen.setDayClosed(date)

    return canteen.toXMLFeed()
示例#5
0
def parse_url(url, today=False):
    content = urlopen(url).read()
    document = parse(content)
    legends = document.find_all('div', {'class': 'legende'})
    if len(legends) > 0:
        extraLegend = {int(v[0]): v[1] for v in reversed(legend_regex.findall(legends[0].text))}
    else:
        extraLegend = {}
    canteen = LazyBuilder()
    for day_td in document.find_all('td', text=day_regex):
        date = day_regex.search(day_td.string).group('date')
        table = None
        for element in day_td.parents:
            if element.name == 'table':
                table = element
                break
        if not table:
            continue
        for tr in table.tbody.find_all('tr'):
            if 'geschlossen' in tr.text or 'Feiertage' in tr.text:
                match = day_range_regex.search(tr.text)
                if not match:
                    canteen.setDayClosed(date)
                else:
                    fromDate = datetime.datetime.strptime(match.group('from'), '%d.%m.%Y')
                    toDate = datetime.datetime.strptime(match.group('to'), '%d.%m.%Y')
                    while fromDate <= toDate:
                        canteen.setDayClosed(fromDate.strftime('%Y-%m-%d'))
                        fromDate += datetime.date.resolution
                continue
            if len(tr) != 3:
                continue  # no meal
            strings = list(tr.contents[0].strings)
            name = strings[0]
            # prices:
            prices = strings[-1].split('|')
            print(prices)
            if '-' in map(lambda v: v.strip(), prices):
                prices = {}
            # notes:
            notes = []
            for img in tr.contents[1].find_all('img'):
                notes.append(img['alt'].replace('Symbol', '').strip())
            for extra in list(set(map(lambda v: int(v), extra_regex.findall(tr.text)))):
                if extra in extraLegend:
                    notes.append(extraLegend[extra])
            canteen.addMeal(date, 'Hauptgerichte', name, notes, prices, roles if prices else None)
    return canteen.toXMLFeed()
示例#6
0
def parse_url(url, today=False):
    canteen = LazyBuilder()
    document = parse(urlopen(url).read())
    for day_div in document.find_all('div', 'day') + document.find_all('article', attrs={'data-day': True}):
        # parse date, warning: calculate year number needed
        date_test = day_regex.search(day_div['data-day'])
        if not date_test:
            print('Error: unable to parse date')
            continue
        else:
            year = datetime.datetime.now().year
            if datetime.datetime.now().month > int(date_test.group('month')):
                year += 1  # date from next year
            date = "{}-{}-{}".format(year, date_test.group('month'), date_test.group('day'), )
        if 'nodata' in day_div.attrs.get('class', []) or 'GESCHLOSSEN' in day_div.text:
            canteen.setDayClosed(date)
            continue
        closed_candidate = False
        for meal_article in day_div.find_all('article', 'menu'):
            name = meal_article.find('div', 'title').text
            if not name:
                continue
            if 'geschlossen' in name:
                closed_candidate = True
                continue
            category = meal_article.find('div', 'desc').text
            notes = [v['title'] for v in meal_article.find_all('div', 'theicon') if v['title']]
            if meal_article.find('div', 'additive'):
                notes += [v[0] for v in extra_regex.findall(meal_article.find('div', 'additive').text)]
            price_div = meal_article.find('div', 'price')
            if price_div is None:
                canteen.addMeal(date, category, name, notes)
                continue
            prices = {}
            for v, r in (('default', 'student'), ('bed', 'employee'), ('guest', 'other')):
                price = price_regex.search(price_div['data-' + v])
                if price:
                    prices[r] = price.group('price')
                elif v == 'default':
                    prices = {}
                    break
            canteen.addMeal(date, category, name, notes, prices)
        if closed_candidate and not canteen.hasMealsFor(date):
            canteen.setDayClosed(date)
    return canteen.toXMLFeed()
示例#7
0
def parse_url(url, today=False):
    canteen = LazyBuilder()
    document = parse(urlopen(url).read(), 'lxml')

    for day_div in document.find_all('div', attrs={'data-day': True}):
        # parse date, warning: calculate year number needed
        date_test = day_regex.search(day_div['data-day'])
        if not date_test:
            print('Error: unable to parse date "{}"'.format(day_div['data-day']))
            continue
        else:
            year = datetime.datetime.now().year
            if datetime.datetime.now().month > int(date_test.group('month')):
                year += 1  # date from next year
            date = '{}-{}-{}'.format(year, date_test.group('month'), date_test.group('day'))

        closed_candidate = day_div.find('div', 'holiday') is not None

        for meal_article in day_div.find_all('article', 'menu'):
            name = meal_article.find('div', 'title').text
            if not name:
                continue

            category = meal_article.find('div', 'icon')['title']
            notes = []
            prices = {}

            additives = meal_article.find('div', 'additnr')
            if additives:
                notes += [additive.text for additive in additives.find_all('li')]
            notes += [v['title'] for v in meal_article.find_all('div', 'theicon') if v['title'] and v['title'] not in notes]

            price_div = meal_article.find('div', 'price')
            if price_div:
                for k, v in price_map.items():
                    price = price_div['data-' + k]
                    if price:
                        prices[v] = price
            canteen.addMeal(date, category, name, notes, prices)

        if closed_candidate and not canteen.hasMealsFor(date):
            canteen.setDayClosed(date)

    return canteen.toXMLFeed()
def parse_url(url, today=False):
    content = urlopen(url).read()
    document = parse(content, "lxml")
    canteen = LazyBuilder()
    table = document.find_all('table')[0]

    def debug_print(food_type, food_description, priceing):
        if (priceing is None):
            print(date + ': ' + food_type + ": " + food_description)
        else:
            print(date + ': ' + food_type + ": " + food_description + " : ",
                  end='')
            for e in priceing:
                print(e, end=' ')
            print()

    def is_new_entry(tds):
        td = tds[0]
        return td.string is not None and date_regex.search(
            td.string) is not None

    def is_end_of_entry(tds):
        for td in tds:
            if (td.string is None or td.string.strip() != ''):
                return False
        return True

    def is_action_entry(td):
        return td.text == 'Aktion'

    def is_closed(tds):
        return is_new_entry(tds) and get_pricing(tds, 4, 7) is None

    def refactor_date(raw_date):
        now = datetime.datetime.now()
        day = date_regex.search(raw_date).group('day')
        month = date_regex.search(raw_date).group('month')
        year = now.year
        if month == '01' and now.month == 12:
            # if list depicts meals from this and the next year
            year += 1
        elif month == '12' and now.month == 1:
            # if list depicts meals form this and the last year
            year -= 1
        return day + '.' + month + '.' + str(year)

    def parse_foot_type(td):
        type = ''
        if td.string is None:
            img = td.find_all('img')[0]
            src = img.get('src')
            if ('msc' in src):
                type += 'Fish MSC '
            elif ('vegan' in src):
                type += 'Vegan '
        #Sometimes none categorized food is possible, therfore we need to cover this,
        #otherwhise openmensa.org will faile dueto an empty tag.
        elif (td.string.strip() == ''):
            type += 'Tipp '
        else:
            if ('R' in td.string):
                type += 'Rind '
            if ('S' in td.string):
                type += 'Schwein '
            if ('G' in td.string):
                type += 'Geflügel '
            if ('V' in td.string):
                type += 'Vegetarisch '
            if ('F' in td.string):
                type += 'Fisch '
            if ('L' in td.string):
                type += 'Lamm '
            if ('W' in td.string):
                type += 'Wild '
        tl = list(type)[:-1]
        return ''.join(tl)

    def get_refs(td):
        return td.find_all('sup')

    def get_foot_description(td):
        refl = get_refs(td)
        description = td.text
        for ref in refl:
            description = description.replace(' ' + ref.text, '', 1)
        if description[0] == ' ':
            description = description.replace(' ', '', 1)
        return description

    def get_notes(td):
        refl = get_refs(td)
        strl = []
        for ref in refl:
            strl.extend(ref.string.split(','))
        strl = list(set(strl))
        return strl

    def build_notes_string(td):
        refs = get_notes(td)
        food_is = ''
        food_contains = ''
        for r in refs:
            # parse food is footnotes
            if r == '1':
                food_is += 'mit Farbstoffen, '
            elif r == '4':
                food_is += 'geschwärzt, '
            elif r == '7':
                food_is += 'mit Antioxidationsmittel, '
            elif r == '8':
                food_is += 'mit Geschmacksverstärker, '
            elif r == '9':
                food_is += 'geschwefelt, '
            elif r == '10':
                food_is += 'geschwärzt, '
            elif r == '11':
                food_is += 'gewachst, '
            elif r == '12':
                food_is += 'mit Phosphat, '
            elif r == '5':
                food_is += 'mit Süßungsmittel, '
            # parse allergic footnotes
            elif r == 'a1':
                food_contains += 'Gluten, '
            elif r == 'a2':
                food_contains += 'Krebstiere, '
            elif r == 'a3':
                food_contains += 'Eier, '
            elif r == 'a4':
                food_contains += 'Fisch, '
            elif r == 'a5':
                food_contains += 'Erdnüsse, '
            elif r == 'a6':
                food_contains += 'Soja, '
            elif r == 'a7':
                food_contains += 'Milch/Laktose, '
            elif r == 'a8':
                food_contains += 'Schalenfrüchte, '
            elif r == 'a9':
                food_contains += 'Sellerie, '
            elif r == 'a10':
                food_contains += 'Senf, '
            elif r == 'a11':
                food_contains += 'Sesam, '
            elif r == 'a12':
                food_contains += 'Schwefeldioxid/Sulfite, '
            elif r == 'a13':
                food_contains += 'Lupinen, '
            elif r == 'a14':
                food_contains += 'Weichtiere, '
            else:
                food_contains += 'undefinierte Chemikalien:' + r + ', '
        notes = ''
        if food_is != '':
            notes += 'Gericht ist ' + food_is
        if food_contains != '':
            if food_is == '':
                notes += 'Gericht enthält '
            else:
                notes += 'und enthält '
            notes += food_contains
        if notes != '':
            nl = list(notes)
            del nl[len(nl) - 1]
            nl[len(nl) - 1] = '.'
            notes = ''.join(nl)
        return notes

    def get_pricing(tds, f, t):
        priceing = []
        #sometimes we dont don't get 7 elements, than this might be a special day
        if len(tds) < 7:
            return None
        for i in range(f, t):
            raw_price = tds[i].string.strip()
            if raw_price == '':
                return None
            else:
                priceing.append(price_regex.search(raw_price).group('val'))
        return priceing

    # state helper
    inside_valide_entry = False
    date = ''

    for tr in table.find_all('tr'):
        tds = tr.find_all('td')
        if (is_new_entry(tds)):
            try:
                raw_date = tds[0].string
                date = refactor_date(raw_date)
                if (is_closed(tds)):
                    # sometismes a canteen might look closed but actually its spargeltage
                    if "Spargeltage" in tds[3].text:
                        canteen.addMeal(date, "Spargel", "Spargel Tag",
                                        ["Spargel und andere Gerichte."], None,
                                        None)
                    else:
                        canteen.setDayClosed(date)
                else:
                    inside_valide_entry = True
            except Exception as e:
                traceback.print_exception(*sys.exc_info())
        if (is_end_of_entry(tds)):
            inside_valide_entry = False
        elif inside_valide_entry:
            try:
                notes = []
                if is_action_entry(tds[0]):
                    food_type = parse_foot_type(tds[1])
                    food_description = get_foot_description(tds[2])
                    notes_string = build_notes_string(tds[2])
                    if (notes_string != ""):
                        notes.append(notes_string)
                    prices = get_pricing(tds, 3, 6)
                    canteen.addMeal(date, 'Aktion: ' + food_type,
                                    food_description, notes, prices,
                                    roles if prices else None)
                else:
                    food_type = parse_foot_type(tds[2])
                    food_description = get_foot_description(tds[3])
                    notes_string = build_notes_string(tds[3])
                    if (notes_string != ""):
                        notes.append(notes_string)
                    prices = get_pricing(tds, 4, 7)
                    canteen.addMeal(date, food_type, food_description, notes,
                                    prices, roles if prices else None)
            except Exception as e:
                traceback.print_exception(*sys.exc_info())

    return canteen.toXMLFeed()
def parse_url(url, today=False):
    content = urlopen(url).read()
    document = parse(content, "lxml")
    canteen = LazyBuilder()
    table = document.find_all('table')[0]

    def debug_print(food_type, food_description, priceing):
            if(priceing is None):
                print(date+': '+food_type+": "+food_description)
            else:
                print(date+': '+food_type+": "+food_description+" : ", end='')
                for e in priceing:
                    print(e, end=' ')
                print()

    def is_new_entry(tds):
        td = tds[0]
        return td.string is not None and date_regex.search(td.string) is not None

    def is_end_of_entry(tds):
        for td in tds:
            if(td.string is None or td.string.strip() != ''):
                return False
        return True

    def is_action_entry(td):
        return td.text == 'Aktion'

    def is_closed(tds):
        return is_new_entry(tds) and get_pricing(tds, 4, 7) is None

    def refactor_date(raw_date):
        now = datetime.datetime.now()
        day = date_regex.search(raw_date).group('day')
        month = date_regex.search(raw_date).group('month')
        year = now.year
        if month == '01' and now.month == 12:
            # if list depicts meals from this and the next year
            year+=1
        elif month == '12' and now.month == 1:
            # if list depicts meals form this and the last year
            year-=1
        return day+'.'+month+'.'+str(year)

    def parse_foot_type(td):
        type = ''
        if td.string is None:
            if len(td.find_all('img')) == 0:
                return None
            else:
                img = td.find_all('img')[0]
                src = img.get('src')
                if('msc' in src):
                    type += 'Fish MSC '
                elif('vegan' in src):
                    type += 'Vegan '
        #Sometimes none categorized food is possible, therfore we need to cover this,
        #otherwhise openmensa.org will faile dueto an empty tag.
        elif(td.string.strip() == ''):
            type += 'Tipp '
        else:
            if('R' in td.string):
                type += 'Rind '
            if('S' in td.string):
                type += 'Schwein '
            if('G' in td.string):
                type += 'Geflügel '
            if('V' in td.string):
                type += 'Vegetarisch '
            if('F' in td.string):
                type += 'Fisch '
            if('L' in td.string):
                type += 'Lamm '
            if('W' in td.string):
                type += 'Wild '
        tl = list(type)[:-1]
        return ''.join(tl)

    def get_refs(td):
        return td.find_all('sup')

    def get_foot_description(td):
        refl = get_refs(td)
        description = td.text
        for ref in refl:
            description = description.replace(' '+ref.text, '', 1)
        if description[0] == ' ':
            description = description.replace(' ', '', 1)
        return description

    def get_notes(td):
        refl = get_refs(td)
        strl = []
        for ref in refl:
            strl.extend(ref.string.split(','))
        strl = list(set(strl))
        return strl

    def build_notes_string(td):
        refs = get_notes(td)
        food_is = ''
        food_contains = ''
        for r in refs:
            # parse food is footnotes
            if r == '1':
                food_is += 'mit Farbstoffen, '
            elif r == '4':
                food_is += 'geschwärzt, '
            elif r == '7':
                food_is += 'mit Antioxidationsmittel, '
            elif r == '8':
                food_is += 'mit Geschmacksverstärker, '
            elif r == '9':
                food_is += 'geschwefelt, '
            elif r == '10':
                food_is += 'geschwärzt, '
            elif r == '11':
                food_is += 'gewachst, '
            elif r == '12':
                food_is += 'mit Phosphat, '
            elif r == '5':
                food_is += 'mit Süßungsmittel, '
            # parse allergic footnotes
            elif r == 'a1':
                food_contains += 'Gluten, '
            elif r == 'a2':
                food_contains += 'Krebstiere, '
            elif r == 'a3':
                food_contains += 'Eier, '
            elif r == 'a4':
                food_contains += 'Fisch, '
            elif r == 'a5':
                food_contains += 'Erdnüsse, '
            elif r == 'a6':
                food_contains += 'Soja, '
            elif r == 'a7':
                food_contains += 'Milch/Laktose, '
            elif r == 'a8':
                food_contains += 'Schalenfrüchte, '
            elif r == 'a9':
                food_contains += 'Sellerie, '
            elif r == 'a10':
                food_contains += 'Senf, '
            elif r == 'a11':
                food_contains += 'Sesam, '
            elif r == 'a12':
                food_contains += 'Schwefeldioxid/Sulfite, '
            elif r == 'a13':
                food_contains += 'Lupinen, '
            elif r == 'a14':
                food_contains += 'Weichtiere, '
            else:
                food_contains += 'undefinierte Chemikalien:'+r+', '
        notes = ''
        if food_is != '':
            notes += 'Gericht ist ' + food_is
        if food_contains != '':
            if food_is == '':
                notes += 'Gericht enthält '
            else:
                notes += 'und enthält '
            notes += food_contains
        if notes != '':
            nl = list(notes)
            del nl[len(nl)-1]
            nl[len(nl)-1] = '.'
            notes = ''.join(nl)
        return notes

    def get_pricing(tds, f, t):
        priceing = []
        #sometimes we dont don't get 7 elements, than this might be a special day
        if len(tds) < 7:
            return None
        for i in range(f, t):
            raw_price = tds[i].string.strip()
            if raw_price == '':
                return None
            else:
                priceing.append(price_regex.search(raw_price).group('val'))
        return priceing

    # state helper
    inside_valide_entry = False
    date = ''

    for tr in table.find_all('tr'):
        tds = tr.find_all('td')
        if(is_new_entry(tds)):
            try:
                raw_date = tds[0].string
                date = refactor_date(raw_date)
                if(is_closed(tds)):
                    # sometismes a canteen might look closed but actually its spargeltage
                    if "Spargeltage" in tds[3].text:
                        canteen.addMeal(date, "Spargel", "Spargel Tag", ["Spargel und andere Gerichte."], None, None)
                    else:
                        canteen.setDayClosed(date)
                else:
                    inside_valide_entry = True
            except Exception as e:
                traceback.print_exception(*sys.exc_info())
        if(is_end_of_entry(tds)):
            inside_valide_entry = False
        elif inside_valide_entry:
            try:
                notes = []
                if is_action_entry(tds[0]):
                    food_type = parse_foot_type(tds[1])
                    food_description = get_foot_description(tds[2])
                    notes_string = build_notes_string(tds[2])
                    if(notes_string != ""):
                        notes.append(notes_string)
                    prices = get_pricing(tds, 3, 6)


                    canteen.addMeal(date, 'Aktion: '+food_type, food_description, notes, prices, roles if prices else None)
                else:
                        food_type = parse_foot_type(tds[2])
                        food_description = get_foot_description(tds[3])
                        notes_string = build_notes_string(tds[3])
                        if(notes_string != ""):
                            notes.append(notes_string)
                        prices = get_pricing(tds, 4, 7)
                        if food_type is not None:
                            canteen.addMeal(date, food_type, food_description, notes, prices, roles if prices else None)
            except Exception as e:
                traceback.print_exception(*sys.exc_info())

    return canteen.toXMLFeed()
示例#10
0
文件: __init__.py 项目: cvzi/mensahd
def parse_url(url, today=False):
    today = datetime.date.today()
    if today.weekday() == 6:  # Sunday
        today += datetime.timedelta(days=1)  # Tomorrow

    if "%s" in url:
        url = url % today.strftime('%Y_%m_%d')

    try:
        content = requests.get(url).text
    except requests.exceptions.ConnectionError as e:
        logging.warning(str(e))
        content = requests.get(url, verify=False).text

    document = BeautifulSoup(content, "html.parser")
    canteen = LazyBuilder()

    # Prices for employees and guests
    try:
        p = price_employee_regex.search(document.find("main").text).groupdict()
        employee = float(p["employee"].split(",")[0]) + \
            float(p["employee"].split(",")[1]) / 100

        p = price_guest_regex.search(document.find("main").text).groupdict()
        guest = float(p["employee"].split(",")[0]) + \
            float(p["employee"].split(",")[1]) / 100
    except (AttributeError, TypeError, KeyError, ValueError):
        employee_multiplier = 1.25
        guest_multiplier = 1.60
        employee = None
        guest = None

    # Date
    p = datespan_regex.search(document.find(
        "div", {"class": "maincontent"}).find("h2").text).groupdict()

    if len(p["from"].split(".")[2]) == 0:
        p["from"] += p["to"].split(".")[2]
    fromdate = datetime.datetime.strptime(p["from"], "%d.%m.%Y")

    maincontent = document.find("div", {"class": "maincontent"})
    table = maincontent.find("table")
    if not table:
        if maincontent:
            # Die Speisenausgabe DHBW Eppelheim ist vom dd.mm.yyyy – dd.mm.yyyy
            # geschlossen
            p = datespan_regex.search(maincontent.text)
            if p:
                fromdate = datetime.datetime.strptime(p["from"], "%d.%m.%Y")
                todate = datetime.datetime.strptime(p["to"], "%d.%m.%Y")
                while fromdate <= todate:
                    canteen.setDayClosed(fromdate.strftime('%d.%m.%Y'))
                    fromdate += datetime.timedelta(1)

        return canteen.toXMLFeed()

    trs = table.find_all("tr")

    date = None
    for tr in trs:

        tds = tr.find_all("td")

        if len(tds) == 4:
            td0, td1, td2, td3 = tds

            day = td0.text.strip()

            date = fromdate + datetime.timedelta(days=daysGerman.index(day))
            date = date.strftime('%d.%m.%Y')

        else:
            td0 = None
            td1, td2, td3 = tds

        notes = []

        if "feiertag" in td1.text.lower() or "geschlossen" in td1.text.lower():
            canteen.setDayClosed(date)
            continue

        categoryName = td1.text.strip()[:-1]
        mealName = td2.text.strip()

        if not categoryName or not mealName:
            continue

        prices = []
        try:
            price = float(euro_regex.search(
                td3.text).group(1).replace(",", "."))
            prices.append(price)
            if employee is not None:
                prices.append(employee)
            else:
                prices.append(price * employee_multiplier)
            if guest is not None:
                prices.append(guest)
            else:
                prices.append(price * guest_multiplier)
        except (AttributeError, TypeError, KeyError, ValueError):
            notes.append(td3.text.strip())

        notes = [x for x in notes if x]
        canteen.addMeal(date, categoryName, mealName, notes if notes else None,
                        prices if prices else None, roles if prices else None)

    return canteen.toXMLFeed()
示例#11
0
文件: __init__.py 项目: cvzi/mensahd
def parse_url(url, today=False):
    today = datetime.date.today()
    if today.weekday() == 6:  # Sunday
        today += datetime.timedelta(days=1)  # Tomorrow

    url = url % today.strftime('%Y_%m_%d')

    if not url.startswith("http://") and not url.startswith("https://"):
        raise RuntimeError("url is not an allowed URL: '%s'" % url)
    try:
        content = requests.get(url).text
    except requests.exceptions.ConnectionError as e:
        logging.warning(e)
        content = requests.get(url, verify=False).text

    document = BeautifulSoup(content, "html.parser")
    canteen = LazyBuilder()

    # Prices for employees and guests
    try:
        p = price_regex.search(document.find(
            "p", {"id": "message"}).text).groupdict()
        employee_multiplier = 1.0 + int(p["employee"]) / 100.0
        guest_multiplier = 1.0 + int(p["guest"]) / 100.0
    except (AttributeError, TypeError, KeyError, ValueError):
        employee_multiplier = 1.25
        guest_multiplier = 1.60

    trs = document.find("table", {"id": "previewTable"}).find_all("tr")

    canteenCategories = []

    firstTr = True
    previous = None   # previous tr row
    for tr in trs:
        closed = False
        mealsFound = False
        if firstTr:
            # First table row contains the names of the different categories
            firstTr = False

            for th in tr.find_all("th")[1:]:
                canteenCategories.append(th.text.strip())

        elif previous is None:
            # Normal table row containing meal information
            previous = tr

        else:
            # Price table row
            date = day_regex.search(previous.find("td", {"class": "first"})[
                                    "data-date"]).group('date')

            if "geschlossen" == previous.find_all("td")[1].text.strip():
                closed = date

            cat = 0
            for td0, td1 in zip(previous.find_all("td")[
                                1:], tr.find_all("td")):
                if "heute kein Angebot" in td0.text or "geschlossen" in td0.text:
                    cat += 1
                    continue

                notes = []

                # Category
                if td0.find("h2"):
                    categoryName = canteenCategories[cat] + " " + \
                        correctCapitalization(td0.find("h2").text.strip())
                else:
                    categoryName = canteenCategories[cat]

                if "Kubusangebote am Themenpark" in td0.text:
                    canteen.addMeal(date, categoryName,
                                    "Kubusangebote am Themenpark", [])
                    cat += 1
                    continue

                # Name
                if td0.find("p"):
                    name = removeextras_regex.sub("", td0.find("p").text)
                else:
                    name = categoryName  # No name available, let's just use the category name

                # Prices
                prices = []
                spans = td1.find_all("span", {"class": "label"})
                if spans:
                    try:
                        price = float(euro_regex.search(
                            spans[0].text).group(1).replace(",", "."))
                    except (AttributeError, TypeError, KeyError, ValueError):
                        notes.append(spans[0].text.strip() + " Preis")
                    if len(spans) == 2:
                        notes.append(spans[1].text.strip() + " Preis")
                    prices = (price, price * employee_multiplier,
                              price * guest_multiplier)

                # Notes: vegan, vegetarisch, ...
                notes += [icon["title"]
                          for icon in td1.find_all("span", {"class": "icon"})]

                canteen.addMeal(date, categoryName, name,
                                notes, prices, roles if prices else None)

                mealsFound = True
                cat += 1

            previous = None
        if not mealsFound and closed:
            canteen.setDayClosed(closed)

    return canteen.toXMLFeed()
示例#12
0
def parsePlan(url, internalMensaId, today):
    canteen = LazyBuilder()
    end = False
    while (url != None):
        dom = BeautifulSoup(urlopen(url).read(), 'lxml')
        date = dom.select('#mensa_date > p')[0].contents[0]
        menuDefinition = dom.find(id=internalMensaId)
        menuDescription = menuDefinition.parent.find('dd')
        tables = menuDescription.select('table')
        legend = {}
        legend = buildLegend(
            legend,
            str(dom),
            regex='<strong>(?P<name>\w+)\s*</strong>\s*-\s*(?P<value>[\w\s)(]+)'
        )
        if tables != None and len(tables) == 1:
            table = tables[0]
            rows = table.find_all('tr')
            for row in rows:
                menuNameElement = row.select('td[class="mensa_col_55"] > b')
                if menuNameElement != None and menuNameElement[
                        0].contents != None:
                    menuName = menuNameElement[0].contents[0]
                    category = 'Gericht'

                    # get notes
                    notes = {}
                    notesElement = row.select(
                        'td[class="mensa_col_55"] > span')
                    if notesElement != None and len(
                            notesElement) > 0 and notesElement[0].text != None:
                        notes = [
                            legend.get(n, n)
                            for n in notesElement[0].text.split(' ') if n
                        ]

                    # get prices
                    prices = {}
                    for td in row.select('td[class="mensa_col_15"]'):
                        priceElement = td.find('b')
                        groupElement = td.find('span')
                        if priceElement != None and groupElement != None and groupElement.contents != None and len(
                                groupElement.contents
                        ) > 0 and priceElement.contents != None and len(
                                priceElement.contents) > 0:
                            group = str(groupElement.contents[0])
                            price = str(priceElement.contents[0])
                            if group == 'Stud.:':
                                prices['student'] = price
                            elif group == 'Bed.:':
                                prices['employee'] = price
                            elif group == 'Gast:':
                                prices['other'] = price

                    canteen.addMeal(date, category, menuName, notes, prices)
        else:
            canteen.setDayClosed(date)

        # check for further pages
        nextPageLink = dom.find(id='next_day_link')
        if nextPageLink == None or today:
            url = None
        else:
            url = 'https://www.studentenwerk-rostock.de/' + nextPageLink['href']
    return canteen.toXMLFeed()