from globals import TeamStat, getInFile, marshal import string from lxml import etree from whohas import whohas tree = etree.parse(getInFile('raw/standings.html'),etree.HTMLParser()) standings=list() for letter in string.ascii_uppercase[:8]: table=tree.xpath('//table[@summary="Group Group %c"]'%(letter))[0] for row in table.findall('./tbody/tr'): [tdTeam,tdPlayed,tdWins,tdDraws,tdLosses,tdGoalsFor,tdGoalsAgainst,tdPoints]=row.getchildren() ts = TeamStat() ts.group = letter ts.teamCode = tdTeam.find('.//img').get('src')[-7:-4].upper() ts.teamName = unicode(tdTeam.find('.//img').get('title').encode('latin1'),'utf-8') # decode and re-encode utf-8 string # previously: ts.teamName = tdTeam.find('.//img').get('title') ts.played = int(tdPlayed.text) ts.wins = int(tdWins.text) ts.draws = int(tdDraws.text) ts.losses = int(tdLosses.text) ts.goalsFor = int(tdGoalsFor.text) ts.goalsAgainst = int(tdGoalsAgainst.text) ts.goalsDiff = int(ts.goalsFor - ts.goalsAgainst) ts.points = int(tdPoints.text) standings.append(ts) marshal(standings,'parsed/gss.pkl')
sys.exit(0) # MatchStat exists, abort parsing. except IOError: pass # file not found means go on, the MatchStat does not yet exist! assert(ms.number == int(div.xpath('./div[@class="footer"]/div[@class="info"]/span')[0].text[6:])) ms.group = div.xpath('./div[@class="footer"]/div[@class="info"]/span')[1].text ms.group = re.search('\w[\w -]+\w', ms.group).group() if 'GROUP ' in ms.group.upper(): ms.group = ms.group[6:].upper() whdate = div.xpath('./div[@class="footer"]/div[@class="info"]/span')[2].text whtime = div.xpath('./div[@class="match"]/div[@class="time"]')[0].text ms.when = re.search('\d\d Ju[nl][ey]', whdate).group() + ' ' + re.search('\d?\d:\d\d', whtime).group() ms.homeCode = div.xpath('./div[@class="match"]/div[@class="teamH"]/div[@class="flag"]/a/img')[0].get('src')[-7:-4].upper() ms.homeName = div.xpath('./div[@class="match"]/div[@class="teamH"]/div[@class="name"]/a')[0].text ms.awayCode = div.xpath('./div[@class="match"]/div[@class="teamA"]/div[@class="flag"]/a/img')[0].get('src')[-7:-4].upper() ms.awayName = div.xpath('./div[@class="match"]/div[@class="teamA"]/div[@class="name"]/a')[0].text assert( ms.homeCode.isalpha() ) assert( ms.awayCode.isalpha() ) assert( ms.homeName[0].isalpha() ) assert( ms.awayName[0].isalpha() ) assert( ms.homeName[-1].isalpha() ) assert( ms.awayName[-1].isalpha() ) ms.hasResults = False marshal(ms,'parsed/match%02d.pkl'%(ms.number))
ms.goalsCode = goals ms.homeGoals = homeGoals ms.awayGoals = awayGoals ms.homeCards = homeCards ms.awayCards = awayCards ms.hasResults = True res = "%d:%d" % (len(homeGoals), len(awayGoals)) try: fg = "%d' -- %s" % min(goals) except: fg = "(no goals)" ycrc = "%d/%d : %d/%d -- total: %d" % ( homeCards[0], homeCards[1], awayCards[0], awayCards[1], homeCards[2] + awayCards[2], ) print "Results of match %d:\n %s:%s %s\n yc/rc: %s\n first goal: %s" % ( ms.number, ms.homeCode, ms.awayCode, res, ycrc, fg, ) marshal(ms, "parsed/match%02d.pkl" % (ms.number))