def fetchPlayer(line):
    def playerError(msg):
        print >> Globals.errlog, "Player %s: %s" % (line.encode('utf-8'), msg.encode('utf-8'))

    lineWithoutSpaces = ''.join(line.split())
    ll = line.lower()
    if '{{fs player' in ll or \
            '{{football squad player' in ll or \
            '{{fs2 player' in ll:
        unlinkedline = wikiutils.unlink_wiki(line)

        columns = [s.strip() for s in unlinkedline.replace('{', '').replace('}', '').split('|')]
        number = None
        nationality = None
        pos = None
        name = None
        firstname = None
        lastname = None
        for column in columns:
            if '=' in column:
                try:
                    k, v = wikiutils.getKeyValue(column)
                except ValueError:
                    playerError("Couldn't parse player information column: %s" % column)
                    continue
                if k == 'no':
                    try:
                        number = int(v)
                    except (UnicodeEncodeError, ValueError):
                        pass # usually dash as a player number
                elif k == 'nat':
                    nationality = v
                elif k == 'pos':
                    pos = v
                elif k == 'name':
                    name = wikiutils.unlinkify(v)[0]
                elif k == 'first':
                    firstname = wikiutils.unlinkify(v)[0]
                elif k == 'last':
                    lastname = wikiutils.unlinkify(v)[0]

        if not name and firstname and lastname:
            name = firstname + ' ' + lastname

        if not number:
            number = 0

        if not nationality:
            nationality = 'NA'

        if nationality and pos and name:
            return soccer.Player(name, number, pos, nationality)

    return None
def parseTeam(team, rvtext, mayGetTemplates):
    players = []
    teamposition = None
    kit = [soccer.Kit(), soccer.Kit()]
    finishedReadingPlayers = False
    lookForSquadTemplate = False

    def teamError(msg):
        print >> Globals.errlog, "Team %s: %s" % (team.encode('utf-8'), msg.encode('utf-8'))

    for line in rvtext.split('\n'):
        lineWithoutSpaces = ''.join(line.split())
        if not finishedReadingPlayers:
            p = playerparser.fetchPlayer(line)
            if p:
                players.append(p)
            else:
                heading = wikiutils.getHeading(line)
                if heading:
                    if mayGetTemplates and 'current squad' in heading.lower() or ('first' in heading.lower() and 'squad' in heading.lower()):
                        lookForSquadTemplate = True
                    else:
                        lookForSquadTemplate = False
                elif lookForSquadTemplate:
                    t = wikiutils.getTemplate(line)
                    if t:
                        text = wikiutils.getPage('Template:' + t)
                        if text:
                            players = playerparser.fetchPlayers(text)
                            if len(players) > 15:
                                finishedReadingPlayers = True

        if playerparser.endOfPlayerList(line):
            finishedReadingPlayers = True

        if lineWithoutSpaces.startswith("|position="):
            # this seems to usually be either this or last season's position
            if not ('promoted' in lineWithoutSpaces.lower() or 'relegated' in lineWithoutSpaces.lower()):
                tp = wikiutils.getNumberKeyValue(line)
                if tp:
                    teamposition = tp

        kitresults = kitinfo_re.findall(line)
        for kitresult in kitresults:
            columns = [x.strip() for x in line.split('|') if 'body' in x or 'shorts' in x or 'socks' in x or 'pattern_b' in x]
            # apparently, n may be more than 1 if more than one kit part is on a line
            for c in columns:
                try:
                    k, v = wikiutils.getKeyValue(c)
                except:
                    continue

                if k.startswith('body'):
                    k = k[4:]
                    if not k: continue
                    n = int(k[0]) - 1
                    if n == 0 or n == 1:
                        kit[n].bodycolor = getColorValue(v)
                elif k.startswith('shorts'):
                    k = k[6:]
                    if not k: continue
                    n = int(k[0]) - 1
                    if n == 0 or n == 1:
                        kit[n].shortscolor = getColorValue(v)
                elif k.startswith('socks'):
                    k = k[5:]
                    if not k: continue
                    n = int(k[0]) - 1
                    if n == 0 or n == 1:
                        kit[n].sockscolor = getColorValue(v)
                elif k.startswith('pattern_b') and k != 'pattern_blue':
                    k = k[9:]
                    if not k: continue
                    n = int(k[0]) - 1
                    # TODO: body type, second color

    if len(players) < 15:
        return None

    if not teamposition:
        teamposition = 0

    return soccer.Team(team, kit, teamposition, players)
예제 #3
0
def getLeagueData(rvtext, leaguedata):
    season = ''
    relegationleagues = dict()
    numteams = 0
    levelnum = 0
    divisions = 0
    class InfoboxState:
        Outside = 0
        Entered = 1
        RelegationLeagues = 2
        NumTeams = 3
        NumLevel = 4
        Season = 5

    ibs = InfoboxState.Outside

    for line in rvtext.split('\n'):
        lineWithoutSpaces = ''.join(line.split())
        if not season and lineWithoutSpaces.startswith("|current="):
            k, v = wikiutils.getKeyValue(line)
            competition, competitionlink = wikiutils.unlinkify(v)
            if competitionlink:
                season = competitionlink

        if not divisions and (lineWithoutSpaces.startswith("|divisions=") or lineWithoutSpaces.startswith("|division=")):
            tp = wikiutils.getNumberKeyValue(line)
            if tp:
                divisions = tp

        if not levelnum and (lineWithoutSpaces.startswith("|levels=") or lineWithoutSpaces.startswith("|level=")):
            tp = wikiutils.getNumberKeyValue(line)
            if tp:
                levelnum = tp

        if len(relegationleagues) == 0 and lineWithoutSpaces.startswith("|relegation="):
            k, v = wikiutils.getKeyValue(line)
            candidates = [wikiutils.unlinkify(x.strip()) for x in br_re.split(v)]
            for cn, cl in candidates:
                if cl:
                    relegationleagues[cl] = cl

        if not numteams and lineWithoutSpaces.startswith('|teams='):
            numteams = wikiutils.getNumberKeyValue(line)

        if ibs == InfoboxState.Outside and lineWithoutSpaces.startswith('{|class="infoboxfootball"'):
            # e.g. Regionalliga_Nord
            ibs = InfoboxState.Entered
        elif ibs != InfoboxState.Outside:
            if lineWithoutSpaces and lineWithoutSpaces[0] == '|':
                text = '|'.join(line.split('|')[2:])
                if not text and lineWithoutSpaces[0:2] == '|}':
                    ibs = InfoboxState.Outside
                    break
                elif text:
                    t, link = wikiutils.unlinkify(text)
                    tl = t.lower()
                    if 'background' in line:
                        if 'relegation' in tl:
                            ibs = InfoboxState.RelegationLeagues
                        elif 'number of clubs' in tl:
                            ibs = InfoboxState.NumTeams
                        elif 'level' in tl:
                            ibs = InfoboxState.NumLevel
                        elif 'current season' in tl:
                            ibs = InfoboxState.Season
                        else:
                            ibs = InfoboxState.Entered
                    else:
                        if ibs == InfoboxState.RelegationLeagues:
                            if not link:
                                ibs = InfoboxState.Entered
                            else:
                                relegationleagues[link] = link
                        elif ibs == InfoboxState.NumTeams:
                            pos = re.findall(r'\d+', t)
                            if len(pos) >= 1:
                                numteams = int(pos[0])
                        elif ibs == InfoboxState.NumLevel:
                            pos = re.findall(r'\d+', t)
                            if len(pos) >= 1:
                                levelnum = int(pos[0])
                        elif ibs == InfoboxState.Season:
                            if not link:
                                ibs = InfoboxState.Entered
                            else:
                                season = link

    if not leaguedata.season:
        leaguedata.season = season
    if not leaguedata.relegationleagues:
        leaguedata.relegationleagues = relegationleagues
    if not leaguedata.numteams:
        leaguedata.numteams = numteams
    if not leaguedata.divisions:
        leaguedata.divisions = divisions
    if not leaguedata.levelnum:
        leaguedata.levelnum = levelnum