Exemplo n.º 1
0
"""Extract pricing information from tcgplayer.com"""


import re
import urllib

import miner


server = "magic.tcgplayer.com"
url_price = "/db/price_guide.asp?setname=%s"

re_name = re.compile(r"<font\s+class=default_7>&nbsp;([^<]*?)</font>")
re_price = re.compile(r"<font\s+class=default_7>\$(\d+\.\d+)&nbsp;</font>")

con = miner.new_connection(server)


def mine_pricelist(setname):
    """Get the average price for a card"""
    url = urllib.quote(url_price % setname, "/?=")
    html = miner.download(con, url, convert_to_unicode=False)
    pricelist = []
    for part in html.split("<TR height=20>"):
        match = re_name.search(part)
        if match is None:
            continue
        cardname = unicode(match.group(1), errors="ignore")
        cardname = cardname.replace(u"AE", u"\xc3")
        prices = re_price.findall(part)
        if prices is None or len(prices) < 3:
Exemplo n.º 2
0
re_set3 = re.compile(r'<span[^>]*>\s*<a\s+href="[^"]*">([^<]+)</a>\s*</span>'
	'\s*<p>\s*<img[^>]*>[^<]*<i>([^<]*)</i>\s*</p>\s*'
	'<p>(.*?)(?:\s([\d\*X]+)/|)([\d\*X]+|),(?:\s+([\d\{\}/WUBRGXYZP]+)\s*'
	'(?:\(\d+\)|)|)\s*</p>\s*<p\s+class="ctext">\s*<b>(.*?)</b>\s*</p>\s*'
	'<p>\s*<i>([\d\D]*?)</i>\s*</p>\s*<p>Illus.\s+([^<]*)</p>')
re_token = re.compile(r'<h2>([^<]*)</h2>') # set name
re_token2 = re.compile(r'<tr[^>]*>\s*'
	'<td>\s*<a\s+href="([^"]*)">(.*?)' # link and name
	'(?:\s+([\d\*X]+)/([\d\*X]+)|)</a>\s*</td>\s*' # power and toughness
	'<td>Token</td>\s*' # token description
	'<td>(?:([\d\*X]+[a-z]?)/[\d\*X]+|-)</td>\s*' # number
	'<td>([^<]*)</td>\s*' # artist
	'</tr>'
)

con = miner.new_connection(server) # httplib.HTTPConnection



def mine_set(setcode, releasedate, magiccardsinfocode):
	"""Mine the date for a magic set"""
	html = miner.download(con, url_set % magiccardsinfocode)
	
	# Get set name
	result = re_set.search(html)
	if result is None:
		raise RuntimeError(_("Pattern match failed."))
	setname, code = result.groups()
	
	# Get set cards
	cids = re_set2.findall(html)