#!/usr/bin/env python3 # -*- coding: utf-8 -*- from xml.etree import ElementTree as ET import re, sys import common as beerlib html = beerlib.download_html('https://untappd.com/v/jbm-brew-lab-pub/4222393') if not html: exit(-1) beerlib.process_untappd(html, 'JBM Brew Lab Pub', sys.argv)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- from xml.etree import ElementTree as ET import re, sys import common as beerlib html = beerlib.download_html('http://ochutnavkovapivnice.cz/prave_na_cepu/') if not html: exit(-1) reg = re.compile('(<table.*</table>)', re.MULTILINE | re.DOTALL) html = reg.search(html) if not html: exit(-2) html = html.group(0) html = html.replace('&', '&') table = ET.XML(html) rows = iter(table) headers = [col[0].text for col in next(rows)] output = [] for row in rows: tds = iter(row) beer = next(tds)[0][0].text values = [beer] + [col.text for col in tds] output = output + [values] beerlib.parser_output(output, headers, 'Ochutnávková pivnice', sys.argv)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- import re, sys from xml.etree import ElementTree as ET import common as beerlib html = beerlib.download_html('https://maltworm.cz/dnes-na-cepu/') if not html: exit(-1) reg = re.compile('(<body.*</body>)', re.MULTILINE | re.DOTALL) body = reg.search(html).group(0) content = re.sub('<script.*</script>', '', body, flags=re.MULTILINE | re.DOTALL) table = ET.XML(content) articles = table.findall(".//article") headers = ['Pivo', 'Typ', 'EPM', 'Alk.', 'IBU', 'Pivovar', 'Město'] output = [] for article in articles: beer = article.find( ".//p[@class='elementor-heading-title elementor-size-default']") info = article.findall(".//span[@class='elementor-icon-list-text']") info = iter(info) values = [beer.text] + ["".join(i.itertext()) for i in info] # get rid of 'IBU:' prefix
div = checkin.find('.//div[@data-rating]') if div is not None: return div.get('data-rating') return None if len(sys.argv) != 3: print("uuparser requires two arguments: untappd_username irc_nick") exit(-1) cwd = os.path.dirname(os.path.realpath(__file__)) CACHE_FILE = os.path.join(cwd, '../cache/untappd.json') USER = sys.argv[1] IRC_NICK = sys.argv[2] html = beerlib.download_html('https://untappd.com/user/' + USER) if not html: exit(-2) reg = re.compile('(<div id="main-stream.*?)<script', re.MULTILINE | re.DOTALL) body = reg.search(html) if not body: exit(-3) body = body.group(1) content = re.sub('<script.*?</script>', '', body, flags=re.MULTILINE | re.DOTALL) content = re.sub('<img.*?>', '', content, flags=re.MULTILINE | re.DOTALL) content = content.replace('&', '&')
#!/usr/bin/env python3 # -*- coding: utf-8 -*- import sys import common as beerlib html = beerlib.download_html('https://untappd.com/v/fa-bar-oranzova/1728532') if not html: exit(-1) beerlib.process_untappd(html, 'F.A. Bar Oranžová', sys.argv)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- import sys, re, json from xml.etree import ElementTree as ET import common as beerlib curl_ua = 'curl/7.54.1' # first we need the post ID html = beerlib.download_html('https://m.facebook.com/page_content_list_view/more/?page_id=1871132519814729&start_cursor=10000&num_to_fetch=10&surface_type=timeline', curl_ua) if not html: exit(-1) reg = re.compile('(<p>.*?</p>)') # find post ids ids = re.findall('top_level_post_id":"([0-9]+)', html) # Look at all articles until some beers are found for content_id in ids: post_url = "https://m.facebook.com/story.php?story_fbid=%s&id=%s" % (content_id, '1871132519814729') # print(post_url) # Okay, let's get the post post_html = beerlib.download_html(post_url, curl_ua) if not post_html: continue paragraphs = reg.findall(post_html)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- import sys, re, json from xml.etree import ElementTree as ET import common as beerlib # first we need the post ID html = beerlib.download_html('https://m.facebook.com/Craftbeerbottleshopbar/') if not html: exit(-1) # first we need the post ID reg = re.compile('(<body.*</body>)', re.MULTILINE | re.DOTALL) body = reg.search(html).group(0) page = ET.XML(body) #articles = page.findall(".//div[@role='article']") articles = page.findall(".//article") #print(body) # Look at all articles until some beers are found for article in articles: data_element = article.get('data-ft') if not data_element: continue data = json.loads(data_element) post_url = "https://m.facebook.com/story.php?story_fbid=%s&id=%s" % ( data['top_level_post_id'], data['content_owner_id_new']) # print(post_url)