Exemplo n.º 1
0
    def fetch_taplist(self, **kwargs) -> bool:
        """fetch the taplist and scrape out the beer list"""
        brewery = kwargs['brewery']

        # construct our URL
        loc_theme = BREWERY_INFO[brewery]
        url = "http://fbpage.digitalpour.com/?companyID={0}&locationID={1}" \
            .format(loc_theme[0], loc_theme[1])
        BreweryPage.fetch_taplist(self, url=url, **kwargs)
        is_cached = self.read_page(brewery)  # read the page
        if is_cached:
            return True

        assert self._cached_response is not None
        start_pos = self._cached_response.find('<body>')
        end_string = '</body>'
        end_pos = self._cached_response.find(end_string)
        html_menu = self._cached_response[start_pos:end_pos + len(end_string)]
        assert end_pos is not -1
        self._soup = bs.BeautifulSoup(html_menu, "html.parser")
        assert self._soup is not None

        beer_div_list = self._soup.find_all("div", {"class": "beverageInfo"})
        self.add_beers(beer_div_list)

        return False  # not from cache
Exemplo n.º 2
0
    def __init__(self, **kwargs):
        BreweryPage.__init__(self, **kwargs)

        # initialize aliases
        self._alias = {
            "Jersey Girl":
            ["jerseygirl", "Jersey Girl Brewing", "Jersey Girl Brewery"]
        }
Exemplo n.º 3
0
    def __init__(self, **kwargs):
        BreweryPage.__init__(self, **kwargs)

        # initialize aliases
        self._alias = {
            "Village Idiot":
            ["Village Idiot Brewery", "Village Idiot Brewing"]
        }
Exemplo n.º 4
0
    def __init__(self, **kwargs):
        BreweryPage.__init__(self, **kwargs)

        # initialize aliases
        self._alias = {"Rinn Duin" : ["Rain doing",\
                                      "Rinn Duin Brewing",\
                                      "Rinn Duin Brewery",\
                                      "ring doing"]}
Exemplo n.º 5
0
    def __init__(self, **kwargs):
        BreweryPage.__init__(self, **kwargs)

        # initialize aliases
        self._alias = {
            "Trap Rock":
            ["Trap Rock Brewing", "Trap Rock Brewery", "track rock"]
        }
Exemplo n.º 6
0
    def __init__(self, **kwargs):
        BreweryPage.__init__(self, **kwargs)

        # initialize aliases
        self._alias = {"Two Ton": ["Two Ton Brewing",
                                   "Two Ton Brewery",
                                   "2 ton",
                                   "2 ton brewing",
                                   "2 ton brewery"]}
Exemplo n.º 7
0
 def test_read(self):
     """Test we can read a brewery page from our base class"""
     mock_url = 'mock://brewery.com'
     bp = BreweryPage(url=mock_url)
     s = requests.Session()
     adapter = requests_mock.Adapter()
     s.mount('mock', adapter)
     adapter.register_uri('GET', mock_url, text='<html><body><div class="field-item"> <div class="beer-holder"></div></div></body></html>')
     assert bp.read_page(brewery="mockbrewery", in_session=s) is False
Exemplo n.º 8
0
    def __init__(self, **kwargs):
        BreweryPage.__init__(self, **kwargs)

        # initialize aliases
        self._alias = {"Departed Soles" : ["Departed Soles Brewing",
                                           "Departed Soles Brewery",
                                           "departed sole",
                                           "departed sole brewing",
                                           "departed sole brewery",
                                           "soul"]}
Exemplo n.º 9
0
    def __init__(self, **kwargs):
        BreweryPage.__init__(self, **kwargs)

        # initialize aliases
        self._alias = {
            "Angry Erik": [
                "Angry Erik Brewing", "Angry Erik Brewery", "Angry Eric",
                "Angry Erik", "Angry or", "Angry Eric Brewing",
                "Angry Eric Brewery"
            ]
        }
Exemplo n.º 10
0
    def __init__(self, **kwargs):
        BreweryPage.__init__(self, **kwargs)

        # initialize aliases
        self._alias = {"Twin Elephant": ["TEB",
                                         "Twin Elephant Brewing",
                                         "Twin Elephant Brewery",
                                         "twelfth",
                                         "12 and",
                                         "12 fit",
                                         "20 elephant"]}
Exemplo n.º 11
0
    def fetch_taplist(self, **kwargs) -> bool:
        """fetch the taplist page for Two Ton and parse it"""
        BreweryPage.fetch_taplist(self, url="http://www.twotonbrewing.com/beers/", **kwargs)
        assert self._url is not None
        is_cached = self.read_page(brewery=list(self._alias.keys())[0])  # read the page
        if is_cached:
            return True

        div_list = self._soup.find_all("div", {"class":"sqs-block-content"})
        for div in div_list:
            self.parse_beer(div)

        return False # not from cache
Exemplo n.º 12
0
    def fetch_taplist(self, **kwargs) -> bool:
        """fetch the taplist page for Cypress and parse it"""
        BreweryPage.fetch_taplist(
            self,
            url="http://cypressbrewing.com/beer_type/currently-available/",
            **kwargs)
        assert self._url is not None
        is_cached = self.read_page(brewery=list(
            self._alias.keys())[0])  # read the page
        if is_cached:
            return True

        div_list = self._soup.find_all("div", {"class": "menu-content-pro"})
        for div in div_list:
            self.parse_beer(div)

        return False  # not from cache
Exemplo n.º 13
0
    def fetch_taplist(self, **kwargs) -> bool:
        """fetch the taplist page for Angry Erik and parse it"""
        BreweryPage.fetch_taplist(
            self, url="http://www.traprockrestaurant.net/menus/8449", **kwargs)
        assert self._url is not None
        is_cached = self.read_page(brewery=list(
            self._alias.keys())[0])  # read the page
        if is_cached:
            return True

        div_list = self._soup.find_all("div", {"class": "menu-item-name"})
        for div in div_list:
            if '32oz' in div.text:
                continue
            self.parse_beer(div)

        return False  # not from cache
Exemplo n.º 14
0
    def fetch_taplist(self, **kwargs) -> bool:
        """fetch and scrape the tap list page for UnTappd"""
        brewery = kwargs.get('brewery')
        assert brewery is not None

        # construct our URL
        url = "https://business.untappd.com/locations/{0}/themes/{1}/js" \
            .format(BREWERY_INFO[brewery][0], BREWERY_INFO[brewery][1])

        # perform any pre-fetch initialization of base class
        BreweryPage.fetch_taplist(self, url=url, **kwargs)
        assert self._url is not None
        is_cached = self.read_page(brewery) # read the page
        if is_cached:
            return True

        assert self._cached_response is not None

        start_string = 'container.innerHTML = "'
        start_pos = self._cached_response.find(start_string)
        end_pos = self._cached_response.find('(function (){')
        end_pos2 = self._cached_response.rfind('"', 0, end_pos)
        html_menu = self._cached_response[start_pos + len(start_string):end_pos2]
        html_menu = html_menu.replace('\\"', '"')
        html_menu = html_menu.replace('/\n', '\n')
        html_menu = html_menu.replace('\\/', '/')

        self._soup = bs.BeautifulSoup(html_menu, "html.parser")
        assert self._soup is not None
        beer_div_list = self._soup.find_all("div", {"class": "beer"})
        assert beer_div_list is not None
        if len(beer_div_list) == 0:
            self.untied_parser(html_menu)
        else:
            section_name_list = self._soup.find_all("div", {"class" : "section-name"})
            draft_section = section_name_list[0].text
            for beer in beer_div_list:
                section_name = beer.find_previous("div", "section-name")
                if section_name.text != draft_section:
                    break
                assert beer is not None
                self.parse_inner_content(beer)

        return False # not from the cache
Exemplo n.º 15
0
    def fetch_taplist(self, **kwargs) -> bool:
        """fetch taplist for TEB, directly scraping their site and parsing"""

        # perform any pre-fetch initialization of base class
        BreweryPage.fetch_taplist(self, url="http://www.twinelephant.com", **kwargs)
        assert self._url is not None
        is_cached = self.read_page(brewery=list(self._alias.keys())[0]) # read the page
        if is_cached:
            return True

        beer_div_list = self._soup.find_all("div", {"class": "beer-holder"})
        for beer in beer_div_list:
            assert beer is not None
            name = None
            style = None
            abv = None
            hops = None
            for content in beer.contents:
                if hasattr(content, 'attrs'):
                    if content.attrs and 'class' in content.attrs:
                        if 'beer-name' in content.attrs['class']:
                            name = content.text
                        if 'beer-style' in content.attrs['class']:
                            style = content.text
                        if 'pure-g' in content.attrs['class']:   # ABV start
                            for inner_c in content.contents:
                                if hasattr(inner_c, 'attrs'):
                                    if 'pure-u-1' in inner_c.attrs['class']:
                                        # our string is "\nABV: 6.2%\n",
                                        # we just want the '6.2%' portion
                                        if inner_c.text.find('ABV:') != -1:
                                            abv = inner_c.text.split('ABV:')[1].strip()
                                            assert abv is not None
                                        elif inner_c.text.find('HOPS:') != -1:
                                            hops = inner_c.text.split('HOPS:')[1].strip()
                                            assert hops is not None

            # now add the beer to the list
            self.add_beer(Beer(name=name, style=style, abv=abv, hops=hops))

        # we now have a list of beers for this brewery
        return False # not from cache
Exemplo n.º 16
0
    def fetch_taplist(self, **kwargs) -> bool:
        """fetch taplist for Jersey Girl, directly scraping their site and parsing"""

        span_list = kwargs.get('mockedlist', None)  # testing injection
        # perform any pre-fetch initialization of base class
        BreweryPage.fetch_taplist(
            self, url="http://www.jerseygirlbrewing.com/beers.html", **kwargs)

        if span_list is None:
            is_cached = self.read_page(brewery=list(
                self._alias.keys())[0])  # read the page
            if is_cached:
                return True
            span_list = self._soup.find_all("span")

        idx = 0
        taplist_start_found = False
        for idx in range(0, len(span_list)):  # pylint: disable=C0200
            span = span_list[idx]
            taplist_start_found = 'On Tap in the Sample Room' in span.text
            if taplist_start_found:
                break

        if not taplist_start_found:
            return False

        # okay we found the Tap List span, now look for beers
        for beer_span_idx in range(idx + 1, len(span_list)):
            beer_span = span_list[beer_span_idx]
            if hasattr(beer_span, 'contents') \
                    and isinstance(beer_span.contents, list) \
                    and beer_span.contents:
                beer_span_contents = beer_span.contents[0]
                if hasattr(beer_span_contents, 'contents') \
                        and isinstance(beer_span_contents.contents, list)\
                        and len(beer_span_contents.contents) > 2:
                    self.find_beers(beer_span_contents.contents)
                    return False

        return False  # not from cache
Exemplo n.º 17
0
    def fetch_taplist(self, **kwargs) -> bool:
        """fetch the taplist for this specific beer management software"""
        brewery = kwargs['brewery']

        # construct our URL
        url = "https://beermenus.com/menu_widgets/{0}".format(
            BREWERY_INFO[brewery][0])
        BreweryPage.fetch_taplist(self, url=url, **kwargs)
        is_cached = self.read_page(brewery=brewery)  # read the page
        if is_cached:
            return True

        assert self._cached_response is not None
        start_string = 'widgetDiv.innerHTML = \'\\n'
        start_pos = self._cached_response.find(start_string)
        end_pos = self._cached_response.rfind(';\n}')
        html_menu = self._cached_response[start_pos +
                                          len(start_string):end_pos - 1]
        html_menu = html_menu.replace('\\"', '"')
        html_menu = html_menu.replace('\\n', '\n')
        html_menu = html_menu.replace('\\/', '/')
        assert end_pos is not -1
        self._soup = bs.BeautifulSoup(html_menu, "html.parser")
        assert self._soup is not None
        taplist_table = self._soup.find('table',
                                        attrs={'class': 'on_tap-section'})
        table_body = taplist_table.find('tbody')
        rows = table_body.find_all('tr')
        for row in rows:
            cols = row.find_all('td')
            if len(row.contents) > 3:
                self.add_beer(
                    Beer(name=cols[0].text.strip().split('\n')[1],
                         style=None,
                         abv=cols[1].text.strip() + '%',
                         ibu=None,
                         desc=None))
        return False  # not from cache
Exemplo n.º 18
0
    def __init__(self, **kwargs):
        BreweryPage.__init__(self, **kwargs)

        # initialize aliases
        self._alias = {"Fort Nonsense": ["Fort Nonsense Brewing",
                                         "Fort Nonsense Brewery",
                                         "fortnite incense"],
                       "Alementary": ["Alementary Brewing",
                                      "Alementary Brewery",
                                      "elementary",
                                      "elementary brewing",
                                      "elementary brewery"],
                       "Man Skirt": ["Man Skirt Brewing",
                                     "Man Skirt Brewery",
                                     "man's skirt",
                                     "mansker"],
                       "Pinelands": ["Pinelands Brewing",
                                     "Pinelands Brewery"],
                       "Untied": ["Untied Brewing",
                                  "Untied Brewery",
                                  "United", "United Brewing", "United Brewery",
                                  "Untied Browing",
                                  "Untied Brewing Company"]}
Exemplo n.º 19
0
    def fetch_taplist(self, **kwargs) -> bool:
        """fetch the taplist page for Departed Soles and parse it"""
        BreweryPage.fetch_taplist(self, url="http://www.departedsoles.com/beer.html", **kwargs)
        assert self._url is not None
        is_cached = self.read_page(brewery=list(self._alias.keys())[0])  # read the page
        if is_cached:
            return True

        beer_div_list = self._soup.find_all("div", {"class": "beersamples"})
        for beer in beer_div_list:
            name = None
            style = None
            abv = None
            if beer.contents[1].name == 'h4':
                name = beer.contents[1].text
                style = beer.contents[3].text.split(u'\u2022')[0].strip()
                abv = beer.contents[3].text.split(u'\u2022')[1].strip()
                # now add the beer to the list
                self.add_beer(Beer(name=name, style=style, abv=abv, hops=None))

        # we now have a list of beers for this brewery
        assert self._beer_list is not None
        return False # not from cache
Exemplo n.º 20
0
    def fetch_taplist(self, **kwargs) -> bool:
        """fetch the taplist page for Kane Brewing and parse it
        We only display "On Premise" beers and not the one's that are
        permanent.
        """
        BreweryPage.fetch_taplist(self,
                                  url="http://www.kanebrewing.com",
                                  **kwargs)
        assert self._url is not None
        is_cached = self.read_page(brewery=list(
            self._alias.keys())[0])  # read the page
        if is_cached:
            return True

        tag_list = self._soup.find_all(
            KanePage.filter
        )  #  find_all("div", {"class": "panel", "class": "product"})
        panel_idx = 0
        for tag in tag_list:
            if "panel-two-column" in tag['class']:
                panel_idx += 1
                if panel_idx > 1:
                    break
            else:
                beer_name = tag.contents[1].text.strip("\n ")
                beer_style = KanePage.parse_beer_style(tag)
                beer_abv = KanePage.parse_beer_abv(tag)
                if len(tag.contents) > 5:
                    beer_desc = tag.contents[5].text.strip("\n ")
                    self.add_beer(
                        Beer(name=beer_name,
                             style=beer_style,
                             abv=beer_abv,
                             desc=beer_desc,
                             hops=None))

        return False  # not from cache
Exemplo n.º 21
0
    def fetch_taplist(self, **kwargs) -> bool:
        """fetch the taplist page for Angry Erik and parse it"""
        BreweryPage.fetch_taplist(self,
                                  url="http://www.angryerik.com/services.html",
                                  **kwargs)
        assert self._url is not None
        is_cached = self.read_page(brewery=list(
            self._alias.keys())[0])  # read the page
        if is_cached:
            return True

        span_list = self._soup.find_all("span")
        for span in span_list:
            if '**' in span.text:
                return False
            contents = span.contents
            if not contents:
                continue
            if hasattr(contents[0], 'name') and contents[0].name == 'u':
                beer_name = contents[0].text
                if beer_name:
                    self.parse_beer_text(beer_name)

        return False  # not from cache
Exemplo n.º 22
0
    def __init__(self, **kwargs):
        BreweryPage.__init__(self, **kwargs)

        # initialize aliases
        self._alias = {"Cypress": ["Cypress Brewing", "Cypress Brewery"]}
Exemplo n.º 23
0
    def __init__(self, **kwargs):
        BreweryPage.__init__(self, **kwargs)

        # initialize aliases
        self._alias = {"Kane": ["Kane Brewing", "Kane Brewery"]}