def parseTeam(team, rvtext, mayGetTemplates): players = [] teamposition = None kit = [soccer.Kit(), soccer.Kit()] finishedReadingPlayers = False lookForSquadTemplate = False def teamError(msg): print >> Globals.errlog, "Team %s: %s" % (team.encode('utf-8'), msg.encode('utf-8')) for line in rvtext.split('\n'): lineWithoutSpaces = ''.join(line.split()) if not finishedReadingPlayers: p = playerparser.fetchPlayer(line) if p: players.append(p) else: heading = wikiutils.getHeading(line) if heading: if mayGetTemplates and 'current squad' in heading.lower() or ('first' in heading.lower() and 'squad' in heading.lower()): lookForSquadTemplate = True else: lookForSquadTemplate = False elif lookForSquadTemplate: t = wikiutils.getTemplate(line) if t: text = wikiutils.getPage('Template:' + t) if text: players = playerparser.fetchPlayers(text) if len(players) > 15: finishedReadingPlayers = True if playerparser.endOfPlayerList(line): finishedReadingPlayers = True if lineWithoutSpaces.startswith("|position="): # this seems to usually be either this or last season's position if not ('promoted' in lineWithoutSpaces.lower() or 'relegated' in lineWithoutSpaces.lower()): tp = wikiutils.getNumberKeyValue(line) if tp: teamposition = tp kitresults = kitinfo_re.findall(line) for kitresult in kitresults: columns = [x.strip() for x in line.split('|') if 'body' in x or 'shorts' in x or 'socks' in x or 'pattern_b' in x] # apparently, n may be more than 1 if more than one kit part is on a line for c in columns: try: k, v = wikiutils.getKeyValue(c) except: continue if k.startswith('body'): k = k[4:] if not k: continue n = int(k[0]) - 1 if n == 0 or n == 1: kit[n].bodycolor = getColorValue(v) elif k.startswith('shorts'): k = k[6:] if not k: continue n = int(k[0]) - 1 if n == 0 or n == 1: kit[n].shortscolor = getColorValue(v) elif k.startswith('socks'): k = k[5:] if not k: continue n = int(k[0]) - 1 if n == 0 or n == 1: kit[n].sockscolor = getColorValue(v) elif k.startswith('pattern_b') and k != 'pattern_blue': k = k[9:] if not k: continue n = int(k[0]) - 1 # TODO: body type, second color if len(players) < 15: return None if not teamposition: teamposition = 0 return soccer.Team(team, kit, teamposition, players)
def getSeasonTeams(rvtext, leaguedata): """Collect all lists that seem to be club lists along with their corresponding headings. If one team list with the number of teams as length, we're done. If multiple team lists, all with the correct number of teams as length, are found, pick the one with the correct heading. If multiple team lists are found, all shorter than the correct number of teams, and the sum of the lists is the total number of teams, create one group for each list.""" correctLengthTeamLists = [] shorterTeamLists = [] tableStatus = 0 teamColumn = -1 thisColumn = -1 haveTeams = False thisTeamHeading = None """First collect all team lists.""" for line in rvtext.split('\n'): lineWithoutSpaces = ''.join(line.split()) ls = line.strip() # print "Table status", tableStatus, "line", ls hd = wikiutils.getHeading(ls) if hd: thisTeamHeading = hd if table_re.match(ls): tableStatus = 1 teamColumn = -1 thisteams = [] elif tableStatus == 1: if ls and ls[0] == '!': teamColumn += 1 if 'Team' in ls or 'Club' in ls: tableStatus = 2 elif tableStatus == 2: if ls[0:2] == '|-': tableStatus = 3 thisColumn = -1 elif tableStatus == 3: if ls[0:2] == '|-': tableStatus = 2 thisColumn = -1 elif len(ls) >= 2 and ls[0] == '|' and ls[1] != '}': columns = ls.split('||') if len(columns) == 1: ''' Columns divided by line. It looks like this (e.g. Premier League): {| class="wikitable sortable" ! Team ! Location ! Stadium |- | [[Arsenal]] | [[London]] | [[Emirates Stadium]] |- ... |} ''' thisColumn += 1 if thisColumn == teamColumn: teamName = ls.strip('|') thisteams.append(teamName) tableStatus = 2 thisColumn = -1 else: ''' Columns on one line. It looks like this (e.g. Football_League_Championship): {| class="wikitable sortable" ! Team ! Location ! Stadium |- | {{ fb team Barnsley }} || [[Barnsley]] || [[Oakwell]] |- ... |} ''' columns = [x.strip() for x in ls[1:].split('||')] if len(columns) > teamColumn: thisteams.append(columns[teamColumn]) tableStatus = 2 if (tableStatus == 2 or tableStatus == 3) and ls[0:2] == '|}': # make sure there are no duplicates in the list - may happen # e.g. with historical winners tables (Regionalliga_Süd) if len(set(thisteams)) == len(thisteams): if len(thisteams) == leaguedata.numteams: addOrUpdateTeamList(correctLengthTeamLists, thisTeamHeading, thisteams) elif thisteams and len(thisteams) < leaguedata.numteams: # Only add list if the same team list not already added. addOrUpdateTeamList(shorterTeamLists, thisTeamHeading, thisteams) tableStatus = 0 thisteams = [] thisTeamHeading = None groups = [] if correctLengthTeamLists and leaguedata.divisions <= 1: if len(correctLengthTeamLists) == 1: groups = [('', correctLengthTeamLists[0][1])] else: if leaguedata.title: for theading, tlist in correctLengthTeamLists: if theading and leaguedata.title in theading: groups = [(theading, tlist)] break if not groups: # if not found, default to the first one (correct in e.g. 2012_Ykkönen) groups = [('', correctLengthTeamLists[0][1])] elif shorterTeamLists and (leaguedata.divisions == 0 or len(shorterTeamLists) == leaguedata.divisions): totalNumTeams = sum([len(l[1]) for l in shorterTeamLists]) if totalNumTeams == leaguedata.numteams: groups = shorterTeamLists return groups