def fetch_taplist(self, **kwargs) -> bool: """fetch the taplist and scrape out the beer list""" brewery = kwargs['brewery'] # construct our URL loc_theme = BREWERY_INFO[brewery] url = "http://fbpage.digitalpour.com/?companyID={0}&locationID={1}" \ .format(loc_theme[0], loc_theme[1]) BreweryPage.fetch_taplist(self, url=url, **kwargs) is_cached = self.read_page(brewery) # read the page if is_cached: return True assert self._cached_response is not None start_pos = self._cached_response.find('<body>') end_string = '</body>' end_pos = self._cached_response.find(end_string) html_menu = self._cached_response[start_pos:end_pos + len(end_string)] assert end_pos is not -1 self._soup = bs.BeautifulSoup(html_menu, "html.parser") assert self._soup is not None beer_div_list = self._soup.find_all("div", {"class": "beverageInfo"}) self.add_beers(beer_div_list) return False # not from cache
def __init__(self, **kwargs): BreweryPage.__init__(self, **kwargs) # initialize aliases self._alias = { "Jersey Girl": ["jerseygirl", "Jersey Girl Brewing", "Jersey Girl Brewery"] }
def __init__(self, **kwargs): BreweryPage.__init__(self, **kwargs) # initialize aliases self._alias = { "Village Idiot": ["Village Idiot Brewery", "Village Idiot Brewing"] }
def __init__(self, **kwargs): BreweryPage.__init__(self, **kwargs) # initialize aliases self._alias = {"Rinn Duin" : ["Rain doing",\ "Rinn Duin Brewing",\ "Rinn Duin Brewery",\ "ring doing"]}
def __init__(self, **kwargs): BreweryPage.__init__(self, **kwargs) # initialize aliases self._alias = { "Trap Rock": ["Trap Rock Brewing", "Trap Rock Brewery", "track rock"] }
def __init__(self, **kwargs): BreweryPage.__init__(self, **kwargs) # initialize aliases self._alias = {"Two Ton": ["Two Ton Brewing", "Two Ton Brewery", "2 ton", "2 ton brewing", "2 ton brewery"]}
def test_read(self): """Test we can read a brewery page from our base class""" mock_url = 'mock://brewery.com' bp = BreweryPage(url=mock_url) s = requests.Session() adapter = requests_mock.Adapter() s.mount('mock', adapter) adapter.register_uri('GET', mock_url, text='<html><body><div class="field-item"> <div class="beer-holder"></div></div></body></html>') assert bp.read_page(brewery="mockbrewery", in_session=s) is False
def __init__(self, **kwargs): BreweryPage.__init__(self, **kwargs) # initialize aliases self._alias = {"Departed Soles" : ["Departed Soles Brewing", "Departed Soles Brewery", "departed sole", "departed sole brewing", "departed sole brewery", "soul"]}
def __init__(self, **kwargs): BreweryPage.__init__(self, **kwargs) # initialize aliases self._alias = { "Angry Erik": [ "Angry Erik Brewing", "Angry Erik Brewery", "Angry Eric", "Angry Erik", "Angry or", "Angry Eric Brewing", "Angry Eric Brewery" ] }
def __init__(self, **kwargs): BreweryPage.__init__(self, **kwargs) # initialize aliases self._alias = {"Twin Elephant": ["TEB", "Twin Elephant Brewing", "Twin Elephant Brewery", "twelfth", "12 and", "12 fit", "20 elephant"]}
def fetch_taplist(self, **kwargs) -> bool: """fetch the taplist page for Two Ton and parse it""" BreweryPage.fetch_taplist(self, url="http://www.twotonbrewing.com/beers/", **kwargs) assert self._url is not None is_cached = self.read_page(brewery=list(self._alias.keys())[0]) # read the page if is_cached: return True div_list = self._soup.find_all("div", {"class":"sqs-block-content"}) for div in div_list: self.parse_beer(div) return False # not from cache
def fetch_taplist(self, **kwargs) -> bool: """fetch the taplist page for Cypress and parse it""" BreweryPage.fetch_taplist( self, url="http://cypressbrewing.com/beer_type/currently-available/", **kwargs) assert self._url is not None is_cached = self.read_page(brewery=list( self._alias.keys())[0]) # read the page if is_cached: return True div_list = self._soup.find_all("div", {"class": "menu-content-pro"}) for div in div_list: self.parse_beer(div) return False # not from cache
def fetch_taplist(self, **kwargs) -> bool: """fetch the taplist page for Angry Erik and parse it""" BreweryPage.fetch_taplist( self, url="http://www.traprockrestaurant.net/menus/8449", **kwargs) assert self._url is not None is_cached = self.read_page(brewery=list( self._alias.keys())[0]) # read the page if is_cached: return True div_list = self._soup.find_all("div", {"class": "menu-item-name"}) for div in div_list: if '32oz' in div.text: continue self.parse_beer(div) return False # not from cache
def fetch_taplist(self, **kwargs) -> bool: """fetch and scrape the tap list page for UnTappd""" brewery = kwargs.get('brewery') assert brewery is not None # construct our URL url = "https://business.untappd.com/locations/{0}/themes/{1}/js" \ .format(BREWERY_INFO[brewery][0], BREWERY_INFO[brewery][1]) # perform any pre-fetch initialization of base class BreweryPage.fetch_taplist(self, url=url, **kwargs) assert self._url is not None is_cached = self.read_page(brewery) # read the page if is_cached: return True assert self._cached_response is not None start_string = 'container.innerHTML = "' start_pos = self._cached_response.find(start_string) end_pos = self._cached_response.find('(function (){') end_pos2 = self._cached_response.rfind('"', 0, end_pos) html_menu = self._cached_response[start_pos + len(start_string):end_pos2] html_menu = html_menu.replace('\\"', '"') html_menu = html_menu.replace('/\n', '\n') html_menu = html_menu.replace('\\/', '/') self._soup = bs.BeautifulSoup(html_menu, "html.parser") assert self._soup is not None beer_div_list = self._soup.find_all("div", {"class": "beer"}) assert beer_div_list is not None if len(beer_div_list) == 0: self.untied_parser(html_menu) else: section_name_list = self._soup.find_all("div", {"class" : "section-name"}) draft_section = section_name_list[0].text for beer in beer_div_list: section_name = beer.find_previous("div", "section-name") if section_name.text != draft_section: break assert beer is not None self.parse_inner_content(beer) return False # not from the cache
def fetch_taplist(self, **kwargs) -> bool: """fetch taplist for TEB, directly scraping their site and parsing""" # perform any pre-fetch initialization of base class BreweryPage.fetch_taplist(self, url="http://www.twinelephant.com", **kwargs) assert self._url is not None is_cached = self.read_page(brewery=list(self._alias.keys())[0]) # read the page if is_cached: return True beer_div_list = self._soup.find_all("div", {"class": "beer-holder"}) for beer in beer_div_list: assert beer is not None name = None style = None abv = None hops = None for content in beer.contents: if hasattr(content, 'attrs'): if content.attrs and 'class' in content.attrs: if 'beer-name' in content.attrs['class']: name = content.text if 'beer-style' in content.attrs['class']: style = content.text if 'pure-g' in content.attrs['class']: # ABV start for inner_c in content.contents: if hasattr(inner_c, 'attrs'): if 'pure-u-1' in inner_c.attrs['class']: # our string is "\nABV: 6.2%\n", # we just want the '6.2%' portion if inner_c.text.find('ABV:') != -1: abv = inner_c.text.split('ABV:')[1].strip() assert abv is not None elif inner_c.text.find('HOPS:') != -1: hops = inner_c.text.split('HOPS:')[1].strip() assert hops is not None # now add the beer to the list self.add_beer(Beer(name=name, style=style, abv=abv, hops=hops)) # we now have a list of beers for this brewery return False # not from cache
def fetch_taplist(self, **kwargs) -> bool: """fetch taplist for Jersey Girl, directly scraping their site and parsing""" span_list = kwargs.get('mockedlist', None) # testing injection # perform any pre-fetch initialization of base class BreweryPage.fetch_taplist( self, url="http://www.jerseygirlbrewing.com/beers.html", **kwargs) if span_list is None: is_cached = self.read_page(brewery=list( self._alias.keys())[0]) # read the page if is_cached: return True span_list = self._soup.find_all("span") idx = 0 taplist_start_found = False for idx in range(0, len(span_list)): # pylint: disable=C0200 span = span_list[idx] taplist_start_found = 'On Tap in the Sample Room' in span.text if taplist_start_found: break if not taplist_start_found: return False # okay we found the Tap List span, now look for beers for beer_span_idx in range(idx + 1, len(span_list)): beer_span = span_list[beer_span_idx] if hasattr(beer_span, 'contents') \ and isinstance(beer_span.contents, list) \ and beer_span.contents: beer_span_contents = beer_span.contents[0] if hasattr(beer_span_contents, 'contents') \ and isinstance(beer_span_contents.contents, list)\ and len(beer_span_contents.contents) > 2: self.find_beers(beer_span_contents.contents) return False return False # not from cache
def fetch_taplist(self, **kwargs) -> bool: """fetch the taplist for this specific beer management software""" brewery = kwargs['brewery'] # construct our URL url = "https://beermenus.com/menu_widgets/{0}".format( BREWERY_INFO[brewery][0]) BreweryPage.fetch_taplist(self, url=url, **kwargs) is_cached = self.read_page(brewery=brewery) # read the page if is_cached: return True assert self._cached_response is not None start_string = 'widgetDiv.innerHTML = \'\\n' start_pos = self._cached_response.find(start_string) end_pos = self._cached_response.rfind(';\n}') html_menu = self._cached_response[start_pos + len(start_string):end_pos - 1] html_menu = html_menu.replace('\\"', '"') html_menu = html_menu.replace('\\n', '\n') html_menu = html_menu.replace('\\/', '/') assert end_pos is not -1 self._soup = bs.BeautifulSoup(html_menu, "html.parser") assert self._soup is not None taplist_table = self._soup.find('table', attrs={'class': 'on_tap-section'}) table_body = taplist_table.find('tbody') rows = table_body.find_all('tr') for row in rows: cols = row.find_all('td') if len(row.contents) > 3: self.add_beer( Beer(name=cols[0].text.strip().split('\n')[1], style=None, abv=cols[1].text.strip() + '%', ibu=None, desc=None)) return False # not from cache
def __init__(self, **kwargs): BreweryPage.__init__(self, **kwargs) # initialize aliases self._alias = {"Fort Nonsense": ["Fort Nonsense Brewing", "Fort Nonsense Brewery", "fortnite incense"], "Alementary": ["Alementary Brewing", "Alementary Brewery", "elementary", "elementary brewing", "elementary brewery"], "Man Skirt": ["Man Skirt Brewing", "Man Skirt Brewery", "man's skirt", "mansker"], "Pinelands": ["Pinelands Brewing", "Pinelands Brewery"], "Untied": ["Untied Brewing", "Untied Brewery", "United", "United Brewing", "United Brewery", "Untied Browing", "Untied Brewing Company"]}
def fetch_taplist(self, **kwargs) -> bool: """fetch the taplist page for Departed Soles and parse it""" BreweryPage.fetch_taplist(self, url="http://www.departedsoles.com/beer.html", **kwargs) assert self._url is not None is_cached = self.read_page(brewery=list(self._alias.keys())[0]) # read the page if is_cached: return True beer_div_list = self._soup.find_all("div", {"class": "beersamples"}) for beer in beer_div_list: name = None style = None abv = None if beer.contents[1].name == 'h4': name = beer.contents[1].text style = beer.contents[3].text.split(u'\u2022')[0].strip() abv = beer.contents[3].text.split(u'\u2022')[1].strip() # now add the beer to the list self.add_beer(Beer(name=name, style=style, abv=abv, hops=None)) # we now have a list of beers for this brewery assert self._beer_list is not None return False # not from cache
def fetch_taplist(self, **kwargs) -> bool: """fetch the taplist page for Kane Brewing and parse it We only display "On Premise" beers and not the one's that are permanent. """ BreweryPage.fetch_taplist(self, url="http://www.kanebrewing.com", **kwargs) assert self._url is not None is_cached = self.read_page(brewery=list( self._alias.keys())[0]) # read the page if is_cached: return True tag_list = self._soup.find_all( KanePage.filter ) # find_all("div", {"class": "panel", "class": "product"}) panel_idx = 0 for tag in tag_list: if "panel-two-column" in tag['class']: panel_idx += 1 if panel_idx > 1: break else: beer_name = tag.contents[1].text.strip("\n ") beer_style = KanePage.parse_beer_style(tag) beer_abv = KanePage.parse_beer_abv(tag) if len(tag.contents) > 5: beer_desc = tag.contents[5].text.strip("\n ") self.add_beer( Beer(name=beer_name, style=beer_style, abv=beer_abv, desc=beer_desc, hops=None)) return False # not from cache
def fetch_taplist(self, **kwargs) -> bool: """fetch the taplist page for Angry Erik and parse it""" BreweryPage.fetch_taplist(self, url="http://www.angryerik.com/services.html", **kwargs) assert self._url is not None is_cached = self.read_page(brewery=list( self._alias.keys())[0]) # read the page if is_cached: return True span_list = self._soup.find_all("span") for span in span_list: if '**' in span.text: return False contents = span.contents if not contents: continue if hasattr(contents[0], 'name') and contents[0].name == 'u': beer_name = contents[0].text if beer_name: self.parse_beer_text(beer_name) return False # not from cache
def __init__(self, **kwargs): BreweryPage.__init__(self, **kwargs) # initialize aliases self._alias = {"Cypress": ["Cypress Brewing", "Cypress Brewery"]}
def __init__(self, **kwargs): BreweryPage.__init__(self, **kwargs) # initialize aliases self._alias = {"Kane": ["Kane Brewing", "Kane Brewery"]}