def fetchPlayer(line): def playerError(msg): print >> Globals.errlog, "Player %s: %s" % (line.encode('utf-8'), msg.encode('utf-8')) lineWithoutSpaces = ''.join(line.split()) ll = line.lower() if '{{fs player' in ll or \ '{{football squad player' in ll or \ '{{fs2 player' in ll: unlinkedline = wikiutils.unlink_wiki(line) columns = [s.strip() for s in unlinkedline.replace('{', '').replace('}', '').split('|')] number = None nationality = None pos = None name = None firstname = None lastname = None for column in columns: if '=' in column: try: k, v = wikiutils.getKeyValue(column) except ValueError: playerError("Couldn't parse player information column: %s" % column) continue if k == 'no': try: number = int(v) except (UnicodeEncodeError, ValueError): pass # usually dash as a player number elif k == 'nat': nationality = v elif k == 'pos': pos = v elif k == 'name': name = wikiutils.unlinkify(v)[0] elif k == 'first': firstname = wikiutils.unlinkify(v)[0] elif k == 'last': lastname = wikiutils.unlinkify(v)[0] if not name and firstname and lastname: name = firstname + ' ' + lastname if not number: number = 0 if not nationality: nationality = 'NA' if nationality and pos and name: return soccer.Player(name, number, pos, nationality) return None
def parseTeam(team, rvtext, mayGetTemplates): players = [] teamposition = None kit = [soccer.Kit(), soccer.Kit()] finishedReadingPlayers = False lookForSquadTemplate = False def teamError(msg): print >> Globals.errlog, "Team %s: %s" % (team.encode('utf-8'), msg.encode('utf-8')) for line in rvtext.split('\n'): lineWithoutSpaces = ''.join(line.split()) if not finishedReadingPlayers: p = playerparser.fetchPlayer(line) if p: players.append(p) else: heading = wikiutils.getHeading(line) if heading: if mayGetTemplates and 'current squad' in heading.lower() or ('first' in heading.lower() and 'squad' in heading.lower()): lookForSquadTemplate = True else: lookForSquadTemplate = False elif lookForSquadTemplate: t = wikiutils.getTemplate(line) if t: text = wikiutils.getPage('Template:' + t) if text: players = playerparser.fetchPlayers(text) if len(players) > 15: finishedReadingPlayers = True if playerparser.endOfPlayerList(line): finishedReadingPlayers = True if lineWithoutSpaces.startswith("|position="): # this seems to usually be either this or last season's position if not ('promoted' in lineWithoutSpaces.lower() or 'relegated' in lineWithoutSpaces.lower()): tp = wikiutils.getNumberKeyValue(line) if tp: teamposition = tp kitresults = kitinfo_re.findall(line) for kitresult in kitresults: columns = [x.strip() for x in line.split('|') if 'body' in x or 'shorts' in x or 'socks' in x or 'pattern_b' in x] # apparently, n may be more than 1 if more than one kit part is on a line for c in columns: try: k, v = wikiutils.getKeyValue(c) except: continue if k.startswith('body'): k = k[4:] if not k: continue n = int(k[0]) - 1 if n == 0 or n == 1: kit[n].bodycolor = getColorValue(v) elif k.startswith('shorts'): k = k[6:] if not k: continue n = int(k[0]) - 1 if n == 0 or n == 1: kit[n].shortscolor = getColorValue(v) elif k.startswith('socks'): k = k[5:] if not k: continue n = int(k[0]) - 1 if n == 0 or n == 1: kit[n].sockscolor = getColorValue(v) elif k.startswith('pattern_b') and k != 'pattern_blue': k = k[9:] if not k: continue n = int(k[0]) - 1 # TODO: body type, second color if len(players) < 15: return None if not teamposition: teamposition = 0 return soccer.Team(team, kit, teamposition, players)
def getLeagueData(rvtext, leaguedata): season = '' relegationleagues = dict() numteams = 0 levelnum = 0 divisions = 0 class InfoboxState: Outside = 0 Entered = 1 RelegationLeagues = 2 NumTeams = 3 NumLevel = 4 Season = 5 ibs = InfoboxState.Outside for line in rvtext.split('\n'): lineWithoutSpaces = ''.join(line.split()) if not season and lineWithoutSpaces.startswith("|current="): k, v = wikiutils.getKeyValue(line) competition, competitionlink = wikiutils.unlinkify(v) if competitionlink: season = competitionlink if not divisions and (lineWithoutSpaces.startswith("|divisions=") or lineWithoutSpaces.startswith("|division=")): tp = wikiutils.getNumberKeyValue(line) if tp: divisions = tp if not levelnum and (lineWithoutSpaces.startswith("|levels=") or lineWithoutSpaces.startswith("|level=")): tp = wikiutils.getNumberKeyValue(line) if tp: levelnum = tp if len(relegationleagues) == 0 and lineWithoutSpaces.startswith("|relegation="): k, v = wikiutils.getKeyValue(line) candidates = [wikiutils.unlinkify(x.strip()) for x in br_re.split(v)] for cn, cl in candidates: if cl: relegationleagues[cl] = cl if not numteams and lineWithoutSpaces.startswith('|teams='): numteams = wikiutils.getNumberKeyValue(line) if ibs == InfoboxState.Outside and lineWithoutSpaces.startswith('{|class="infoboxfootball"'): # e.g. Regionalliga_Nord ibs = InfoboxState.Entered elif ibs != InfoboxState.Outside: if lineWithoutSpaces and lineWithoutSpaces[0] == '|': text = '|'.join(line.split('|')[2:]) if not text and lineWithoutSpaces[0:2] == '|}': ibs = InfoboxState.Outside break elif text: t, link = wikiutils.unlinkify(text) tl = t.lower() if 'background' in line: if 'relegation' in tl: ibs = InfoboxState.RelegationLeagues elif 'number of clubs' in tl: ibs = InfoboxState.NumTeams elif 'level' in tl: ibs = InfoboxState.NumLevel elif 'current season' in tl: ibs = InfoboxState.Season else: ibs = InfoboxState.Entered else: if ibs == InfoboxState.RelegationLeagues: if not link: ibs = InfoboxState.Entered else: relegationleagues[link] = link elif ibs == InfoboxState.NumTeams: pos = re.findall(r'\d+', t) if len(pos) >= 1: numteams = int(pos[0]) elif ibs == InfoboxState.NumLevel: pos = re.findall(r'\d+', t) if len(pos) >= 1: levelnum = int(pos[0]) elif ibs == InfoboxState.Season: if not link: ibs = InfoboxState.Entered else: season = link if not leaguedata.season: leaguedata.season = season if not leaguedata.relegationleagues: leaguedata.relegationleagues = relegationleagues if not leaguedata.numteams: leaguedata.numteams = numteams if not leaguedata.divisions: leaguedata.divisions = divisions if not leaguedata.levelnum: leaguedata.levelnum = levelnum