示例#1
0
 def feed_today(self, name):
     if name in self.canteens:
         today = nowBerlin().date()
         lazyBuilder = StyledLazyBuilder()
         mensaId = self.canteens[name]["id"]
         _parseMealsUrl(lazyBuilder, mensaId, today)
         return lazyBuilder.toXMLFeed()
     return 'Wrong mensa name'
示例#2
0
    def feed_all(self, name):
        startTime = time.time()
        if name in self.canteens:
            mensaId = self.canteens[name]["id"]
            lazyBuilder = StyledLazyBuilder()

            date = nowBerlin()

            # Get today
            ret = _parseMealsUrl(lazyBuilder, mensaId, date.date())

            n = 1
            if ret:
                date += datetime.timedelta(days=1)
                # Get this week
                threads = []
                while date.weekday() < 5:
                    t = Thread(target=_parseMealsUrl,
                               args=(lazyBuilder, mensaId, date.date()))
                    t.start()
                    threads.append(t)
                    date += datetime.timedelta(days=1)
                    n += 1

                # Skip over weekend
                date += datetime.timedelta(days=7 - date.weekday())

                # Get next week
                while date.weekday() < 5 and n < 5:
                    t = Thread(target=_parseMealsUrl,
                               args=(lazyBuilder, mensaId, date.date()))
                    t.start()
                    threads.append(t)
                    date += datetime.timedelta(days=1)
                    n += 1

                for t in threads:
                    t.join()

            endTime = time.time()
            logging.debug(
                f"feed_all({name}) took {endTime - startTime:.2f} seconds")

            return lazyBuilder.toXMLFeed()
        return 'Wrong mensa name'
示例#3
0
文件: __init__.py 项目: cvzi/mensa
    def parseHorizontalDates(document, lazyBuilder, legend):
        lazyBuilder.setLegendData(
            legend)  # Useless, because the legends are usually incomplete
        tables = document.select('table.std thead')
        if not tables:
            logging.warning("No tables found")
            return

        for thead in tables:
            dates = []
            now = nowBerlin()
            dateTexts = [td.text.strip() for td in thead.tr.select('td')]
            for s in dateTexts:
                m = datePattern.search(s)
                if not m:
                    continue
                date = m[0]
                spt = date.split('.')
                if spt[-1] == '':
                    if int(spt[-2]) < now.month:
                        date += str(now.year + 1)
                    else:
                        date += str(now.year)
                dates.append(date)

            firstRow = True
            for tr in thead.children:
                if not isinstance(tr, bs4.element.Tag):
                    continue
                if firstRow:
                    # First row are the dates
                    firstRow = False
                    continue

                category = tr.td.text.strip()

                dateIndex = 0
                for td in tr.select('td.zelle_inhalt'):
                    date = dates[dateIndex]
                    dateIndex += 1

                    notes = []
                    if not td.a:
                        continue
                    if "gruen" in td.a["class"]:
                        notes.append("fleischlos")

                    additives = [
                        x.attrs["alt"].strip()
                        for x in td.select('a')[0].select('.additive img[alt]')
                        if x.attrs["alt"].strip()
                    ]
                    for span in td.select('div[style*="font-size:10px"] span'):
                        additive = span.text.strip()
                        if additive not in additives:
                            additives.append(additive)
                        span.clear()

                    notes += [
                        legend[additive] if additive in legend else additive
                        for additive in additives
                    ]

                    mealName = " ".join(
                        x.strip(" ,").strip()
                        for x in td.select('a')[0].strings)

                    price = 0
                    for m in pricePattern.findall(mealName):
                        price += float(m[0].replace(',', '.'))

                    prices = []
                    roles = []

                    if price > 0:
                        prices.append(price)
                        roles.append('student')

                    if not mealName:
                        continue

                    for j, productName in enumerate(
                            textwrap.wrap(mealName, width=250)):
                        lazyBuilder.addMeal(date, category, productName,
                                            notes if j == 0 else None,
                                            prices if j == 0 else None,
                                            roles if j == 0 else None)
示例#4
0
文件: __init__.py 项目: cvzi/mensa
    def feed(self, refName):
        if refName not in self.canteens:
            return f"Unkown canteen '{refName}'"
        path = self.canteens[refName]["source"]
        domain = self.canteens[refName]["domain"]
        pasto = self.canteens[refName].get("pasto", None)

        today = nowBerlin()

        if "{timestamp}" in path:
            if today.weekday() == 6:
                ts = today + datetime.timedelta(days=1)
            else:
                ts = today

            path = path.format(timestamp=int(ts.timestamp()))
        if "change_language" in self.canteens[refName]:
            lang = self.canteens[refName]["change_language"]
            html = requests.get(f"https://{domain}/change_language/{lang}",
                                headers={
                                    "Referer": f"https://{domain}{path}"
                                }).text
        else:
            html = requests.get(f"https://{domain}{path}").text

        lazyBuilder = StyledLazyBuilder()
        document = BeautifulSoup(html, "html.parser")

        # Log name
        logging.debug(f"\tReference: {refName}")
        for selected in document.select(
                '#selector_bar_container select option[selected]'):
            if selected.text:
                logging.debug(f"\tSelected: {selected.text}")
            else:
                logging.debug(f"\tSelected: {selected}")

        # Dates
        dates = []
        for day in document.select(".days_container .day"):
            try:
                i = int(day.text)
            except ValueError:
                continue

            date = today.replace(day=i)
            if date.day > today.day:
                try:
                    date = date.replace(month=date.month -
                                        1 if date.month > 1 else 12)
                except ValueError:
                    date = date.replace(
                        month=date.month if date.month > 1 else 12)

            if dates and date < dates[-1]:
                date = date.replace(month=date.month +
                                    1 if date.month < 12 else 1)
            dates.append(date)

        # Meals

        settimana = document.find("div", {"id": "settimana"})
        if settimana:
            for table in settimana.select("table.tabella_menu_settimanale"):

                if table.find("h5"):
                    heading = table.find("h5").text.strip().lower()
                    if heading:
                        if pasto and heading != pasto.lower():
                            logging.debug(
                                f"\tSkipping pasto: {heading} (!= {pasto.lower()})"
                            )
                            continue
                        else:
                            logging.debug(f"\tUsing pasto: {heading}")

                for tr in table.select("tr"):
                    category = tr.find("th").text.strip()
                    for td in tr.select("td"):
                        day_index = int(td.attrs["data-giorno"]) - 1
                        for p in td.select("p.piatto_inline"):
                            name = p.text.replace(" *", "").replace(
                                "* ", "").replace("*", "").strip()
                            for mealText in textwrap.wrap(name, width=250):
                                lazyBuilder.addMeal(dates[day_index].date(),
                                                    category, mealText)

        return lazyBuilder.toXMLFeed()
示例#5
0
def _parseMealsUrl(lazyBuilder, mensaId, day=None):
    if day is None:
        day = nowBerlin().date()
    date = day.strftime("%Y-%m-%d")

    content = _getMealsURL(mealsUrl.format(date=date))
    document = BeautifulSoup(content, "html.parser")

    mensaDivs = document.find_all("div",
                                  class_="tx-epwerkmenu-menu-location-wrapper")
    mensaDivs = [
        mensaDiv for mensaDiv in mensaDivs
        if mensaDiv.attrs["data-location"] == str(mensaId)
    ]
    if len(mensaDivs) != 1:
        # Check if mensa is in drowndown selector
        checkbox = document.find(id=f"building-id-{mensaId}")
        if checkbox:
            logging.debug(f"No meals found [id='{mensaId}']")
            with lazyBuilderLock:
                lazyBuilder.setDayClosed(date)
        else:
            logging.error(f"Mensa not found [id='{mensaId}']")
        return False

    mensaDiv = mensaDivs.pop()
    menuTiles = mensaDiv.find_all("div", class_="menue-tile")

    foundAny = False
    for menuTile in menuTiles:
        category = string.capwords(menuTile.attrs["data-category"])
        mealName = menuTile.find(
            class_="tx-epwerkmenu-menu-meal-title").text.strip()
        desc = menuTile.find(class_="tx-epwerkmenu-menu-meal-description")
        if desc and desc.text.strip():
            mealName = f"{mealName} {desc.text.strip()}"

        additives = menuTile.find(class_="tx-epwerkmenu-menu-meal-additives")
        for sup in additives.find_all('sup'):
            sup.extract()
        notes = [
            note.strip() for note in additives.text.split("\n")
            if note.strip()
        ]

        pricesDiv = menuTile.find(
            class_="tx-epwerkmenu-menu-meal-prices-values")

        roles = []
        prices = []
        for j, price in enumerate(pricesDiv.text.split('/')):
            price = price.strip().replace(',', '.')
            try:
                price = float(price)
                prices.append(price)
                roles.append(rolesOrder[j])
            except ValueError:
                pass

        with lazyBuilderLock:
            for j, mealText in enumerate(textwrap.wrap(mealName, width=250)):
                lazyBuilder.addMeal(date, category, mealName,
                                    notes if j == 0 else None,
                                    prices if j == 0 else None,
                                    roles if j == 0 else None)
        foundAny = True

    if foundAny:
        return True

    with lazyBuilderLock:
        lazyBuilder.setDayClosed(date)

    return False
示例#6
0
def getMenu(mensaId):
    """
    Create openmensa feed from mensen.at website
    """
    lazyBuilder = StyledLazyBuilder()

    today = nowBerlin().date()
    year = today.year
    month = today.month

    r = askMensenAt(mensaId=mensaId)
    if r.status_code != 200:
        status = 'Could not open mensen.at'
        if 'status' in r.headers:
            status = f"{status}: {r.headers['status']}"
        logging.error(status)
        from pprint import pprint
        pprint(r.headers)
        return status

    document = BeautifulSoup(r.text, "html.parser")

    def extractLine(line, data):
        def price(m):
            data['price'] = m[1].replace(',', '.')
            return ""

        def addi(m):
            data['additives'] += [
                x.strip() for x in m.group(0)[1:-1].split(',') if x.strip()
            ]
            return ""

        line = re.sub("€\s+(\d+[,\.]\d+)", price, line)
        line = re.sub("\(([A-Z],?\s*)+\)", addi, line)
        line = re.sub("\s+", " ", line).strip().replace(' ,', ',')
        data['text'].append(line)

    dates = {}
    for navItem in document.select('.weekdays .nav-item[data-index]'):
        index = int(navItem.attrs['data-index'])
        date = navItem.find('span', class_="date").text.split('.')
        dates[index] = datetime.date(
            year + 1 if int(date[1]) < month else year, int(date[1]),
            int(date[0]))

    mealDict = {}

    for menuItem in document.select(".menu-item[class*='menu-item-']"):
        index = int([
            className.split('menu-item-')[1]
            for className in menuItem.attrs['class']
            if 'menu-item-' in className
        ][0])
        category = menuItem.h2.text

        if index not in mealDict:
            mealDict[index] = {}

        lines = []
        imgs = []

        for p in menuItem.find_all('p'):
            lines.append(p.text)
            imageList = []
            imgs.append(imageList)

            for img in p.find_all('img'):
                if 'alt' in img.attrs:
                    imageList.append(img.attrs['alt'])
                else:
                    foundSrc = False
                    for src, alt in imageLegend.items():
                        if src in img.attrs['src']:
                            imageList.append(alt)
                            foundSrc = True
                            break
                    if not foundSrc:
                        logging.warning("Unkown image found: %r" % (img, ))

        lines = [p.text.strip() for p in menuItem.find_all('p')]
        lines.append('#end')
        imgs.append([])

        data = {'additives': [], 'text': []}
        for i, line in enumerate(lines):
            data['additives'] += imgs[i]

            addMeal = False
            if line == '#end':
                addMeal = True
            elif all(c == '*' for c in line):  # *********
                addMeal = True
            else:
                extractLine(line, data)
            if 'price' in data:
                addMeal = True

            if line.startswith('*') and addMeal:
                addMeal = False

            if addMeal and data['text']:
                data['additives'] = [
                    legend[add] if add in legend else add
                    for add in data['additives'] if add
                ]
                notes = list(
                    dict.fromkeys([note[0:249] for note in data['additives']]))
                for j, productName in enumerate(
                        textwrap.wrap(" ".join(data['text']).strip(),
                                      width=250)):
                    if category not in mealDict[index]:
                        mealDict[index][category] = []
                    if productName not in mealDict[index][category]:
                        mealDict[index][category].append(productName)
                        lazyBuilder.addMeal(
                            dates[index], category, productName,
                            notes if j == 0 else None, (data['price'], )
                            if 'price' in data and j == 0 else None,
                            roles if 'price' in data and j == 0 else None)
                data = {'additives': [], 'text': []}

    return lazyBuilder.toXMLFeed()
示例#7
0
文件: tools.py 项目: cvzi/mensa
def getMenu(restaurantId,
            datetimeDay=None,
            serviceIds=None,
            alternativeId=None,
            alternativeServiceIds=None):
    """
    Create openmensa feed from restopolis website
    """
    startTime = time.time()
    lazyBuilder = StyledLazyBuilder()
    comments = []

    if not datetimeDay:
        datetimeDay = nowBerlin().date()

    if isinstance(serviceIds, str) or not isinstance(serviceIds, Iterable):
        serviceIds = [
            (serviceIds, ""),
        ]
    for i, service in enumerate(serviceIds):
        if isinstance(service, str) or isinstance(service, int):
            serviceIds[i] = (service, "")

    mealCounter = 0
    dayCounter = set()
    weekdayCounter = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0}
    repeat = len(serviceIds) == 1
    repeatCounter = 0
    mealCounterLast = mealCounter
    for service in serviceIds:
        serviceSuffix = f"({service[1]})" if service[1] and len(
            serviceIds) > 1 else ""
        r = askRestopolis(restaurant=restaurantId,
                          service=service[0],
                          date=datetimeDay)
        if r.status_code != 200:
            status = f'Could not open restopolis Error{r.status_code}'
            if 'status' in r.headers:
                status = f"{status}: {r.headers['status']}"
            logging.error(status)
            from pprint import pprint
            pprint(r.headers)
            return status, 0, 0, weekdayCounter

        if '<' not in r.text:
            comments.append(
                f"Restaurant [id={restaurantId}, service={service}]: No HTML in response body: `{r.text}`"
            )
            break

        document = BeautifulSoup(r.text, "html.parser")

        # Extract available dates from date selector
        dateSelector = document.find("div", {"class": "date-selector-desktop"})

        if dateSelector:
            dateButtons = dateSelector.find_all("button", {"class": "day"})
            dates = []
            for button in dateButtons:
                dates.append(
                    datetime.datetime.strptime(button.attrs["data-full-date"],
                                               '%d.%m.%Y').date())
        else:
            dateSelector = document.find(
                "div", {"class": "date-selector-mobile-indicator"})
            if dateSelector:
                dateButtons = dateSelector.find_all(
                    "div", {"class": "date-selector-mobile-day-bullet"})
                dates = []
                for button in dateButtons:
                    dates.append(
                        datetime.datetime.strptime(
                            button.attrs["data-day-text"], '%d.%m.%Y').date())

            else:
                dateSelector = document.find("div", {"id": "date-selector"})
                if dateSelector:
                    dateButtons = dateSelector.find_all("a", {"class": "day"})
                    dates = []
                    for button in dateButtons:
                        dates.append(
                            datetime.datetime.strptime(
                                button.attrs["data-date"], '%d.%m.%Y').date())

                elif not dateSelector:
                    logging.warning(f"No div.date-selector-desktop found")
                    comments.append(
                        f"Restaurant [id={restaurantId}, service={service}] not found"
                    )
                    break

        # Extract menu for each date
        for i, oneDayDiv in enumerate(document.select(".daily-menu>div")):
            dayCounter.add(dates[i])
            date = dates[i]
            weekDay = date.weekday()
            courseName = ""
            categoryNotes = []
            notes = []
            productSection = ""
            productName = ""
            productAllergens = []
            productDescription = ""
            isClosed = False

            oneDayDiv.append(
                document.new_tag("div", attrs={"class": "fake-last"}))
            children = list(oneDayDiv.children)
            for div in children:
                if not isinstance(div, bs4.element.Tag):
                    # Skip text node children
                    continue
                if not isClosed and courseName and productName and "class" in div.attrs and (
                        "fake-last" in div.attrs["class"]
                        or "product-name" in div.attrs["class"]
                        or "course-name" in div.attrs["class"]
                        or "product-section" in div.attrs["class"]):
                    # Add meal
                    mealCounter += 1
                    weekdayCounter[weekDay] += 1
                    category = courseName
                    if productSection:
                        category += " " + productSection
                    if serviceSuffix:
                        category += " " + serviceSuffix
                    if productDescription:
                        notes += textwrap.wrap(productDescription, width=250)
                    if productAllergens:
                        notes += productAllergens
                    if categoryNotes:
                        notes += categoryNotes
                    lazyBuilder.addMeal(date, category, productName[0:249],
                                        [note[0:249] for note in notes])
                    productName = ""
                    productAllergens = []
                    productDescription = ""
                    notes = []

                # walk through all div and collect info
                if "class" in div.attrs:
                    if "fake-last" in div.attrs["class"]:
                        pass
                    elif "no-products" in div.attrs["class"] or div.find(
                            ".formulaeContainer.no-products"):
                        # Closed (No meals)
                        lazyBuilder.setDayClosed(date)
                        isClosed = True
                    elif "fermé" in div.text.lower() or "fermé" in str(
                            div.attrs).lower():
                        # Closed (explicit)
                        lazyBuilder.setDayClosed(date)
                        isClosed = True
                    elif "course-name" in div.attrs["class"]:
                        courseName = div.text.strip()
                        productSection = ""
                    elif "product-section" in div.attrs["class"]:
                        productSection = div.text.strip()
                    elif "product-allergens" in div.attrs["class"]:
                        productAllergensGen = (a.strip()
                                               for a in div.text.split(",")
                                               if a.strip())
                        productAllergens += [
                            re.sub("\d+", lambda m: allergen(m[0]), a)
                            for a in productAllergensGen
                        ]
                    elif "product-description" in div.attrs["class"]:
                        productDescription = div.text.strip()
                    elif "product-name" in div.attrs["class"]:
                        productName = div.text.strip()
                        productName = productName.replace("''", '"')
                        productName = productName.replace("1/2 ", '½ ')
                    elif "product-flag" in div.attrs["class"]:
                        unknownImg = True
                        for img in imgs:
                            if div.attrs["src"].endswith(img):
                                notes.append(imgs[img])
                                unknownImg = False
                        if unknownImg:
                            logging.warning(
                                f"Unkown img {div.attrs['src']} [restaurant={restaurantId}]"
                            )
                            comments.append(
                                f"Unkown img {div.attrs['src']} [restaurant={restaurantId}]"
                            )
                    elif "wrapper-theme-day" in div.attrs["class"]:
                        logging.info(
                            f"Theme day: {div.text.strip()} [restaurant={restaurantId}]"
                        )
                        comments.append(
                            f"Theme day: {div.text.strip()} [restaurant={restaurantId}]"
                        )
                    elif "wrapper-category" in div.attrs["class"]:
                        for categoryButton in div.find_all('button'):
                            if "showConstantProducts" not in categoryButton.attrs[
                                    'class'] and "showFormulae" not in categoryButton.attrs[
                                        'class']:
                                logging.info(
                                    f"Unknown category button: {categoryButton.attrs['class']}: {categoryButton.text.strip()}"
                                )
                                comments.append(
                                    f"Unknown category button: {categoryButton.attrs['class']}: {categoryButton.text.strip()}"
                                )
                    elif "cb" in div.attrs["class"]:
                        pass
                    elif "formulaeContainer" in div.attrs[
                            "class"] or "constantProductContainer" in div.attrs[
                                "class"]:
                        # Append content of category container
                        last = children.pop()
                        children.extend(div.children)
                        children.append(last)

                        if "constantProductContainer" in div.attrs["class"]:
                            categoryNotes = ["produit constant"]
                        else:
                            categoryNotes = []
                    elif "action-buttons" in div.attrs["class"]:
                        pass
                    else:
                        logging.debug(div)
                        raise RuntimeWarning(
                            f"unknown tag <{div.name}> with class {div.attrs['class']}: oneDayDiv->else [restaurant={restaurantId}]"
                        )
                elif div.name == 'ul':
                    mealCounter += 1
                    weekdayCounter[weekDay] += 1
                    for li in div.select('li'):
                        if not li.text or not li.text.strip():
                            continue
                        # Add meal
                        category = courseName
                        if productSection:
                            category += " " + productSection
                        lazyBuilder.addMeal(date, category,
                                            li.text.strip()[0:249])
                        productName = ""
                        productAllergens = []
                        productDescription = ""
                else:
                    logging.debug(div)
                    raise RuntimeWarning(
                        f"unknown tag <{div.name}>: oneDayDiv->else")

        if hasattr(r, 'duration'
                   ) and r.duration < 2000 and time.time() - startTime < 7000:
            if repeat and repeatCounter < 3 and (
                    mealCounter > 0 and mealCounter > mealCounterLast
                    or nowBerlin().weekday() in (5, 6)):
                repeatCounter += 1
                mealCounterLast = mealCounter
                serviceIds.append(service)
                datetimeDay += datetime.timedelta(days=7)

    if mealCounter == 0 and alternativeId:
        logging.debug("No meals -> trying alternativeId")
        return getMenu(alternativeId,
                       datetimeDay=datetimeDay,
                       serviceIds=alternativeServiceIds,
                       alternativeId=None,
                       alternativeServiceIds=None)

    xml = lazyBuilder.toXMLFeed()
    for commentStr in comments:
        xml += f"\n<!-- {commentStr.replace('--', '- -')} -->\n"

    print(f" {len(dayCounter):3d} 📅 {mealCounter:4d} 🍽️ ", end="")
    return xml, len(dayCounter), mealCounter, weekdayCounter