def fetchPlayer(line):
    def playerError(msg):
        print >> Globals.errlog, "Player %s: %s" % (line.encode('utf-8'), msg.encode('utf-8'))

    lineWithoutSpaces = ''.join(line.split())
    ll = line.lower()
    if '{{fs player' in ll or \
            '{{football squad player' in ll or \
            '{{fs2 player' in ll:
        unlinkedline = wikiutils.unlink_wiki(line)

        columns = [s.strip() for s in unlinkedline.replace('{', '').replace('}', '').split('|')]
        number = None
        nationality = None
        pos = None
        name = None
        firstname = None
        lastname = None
        for column in columns:
            if '=' in column:
                try:
                    k, v = wikiutils.getKeyValue(column)
                except ValueError:
                    playerError("Couldn't parse player information column: %s" % column)
                    continue
                if k == 'no':
                    try:
                        number = int(v)
                    except (UnicodeEncodeError, ValueError):
                        pass # usually dash as a player number
                elif k == 'nat':
                    nationality = v
                elif k == 'pos':
                    pos = v
                elif k == 'name':
                    name = wikiutils.unlinkify(v)[0]
                elif k == 'first':
                    firstname = wikiutils.unlinkify(v)[0]
                elif k == 'last':
                    lastname = wikiutils.unlinkify(v)[0]

        if not name and firstname and lastname:
            name = firstname + ' ' + lastname

        if not number:
            number = 0

        if not nationality:
            nationality = 'NA'

        if nationality and pos and name:
            return soccer.Player(name, number, pos, nationality)

    return None
Пример #2
0
def addOrUpdateTeamList(l, heading, teams):
    def cleaned(namelist):
        ret = []
        for t in sorted(namelist):
            ret.append(t.replace(' ', ' '))
        return ret
    """Check whether a list with the same team names already exists in the list.
    If this is the case, have the team list with more links in the list."""
    tlist = sorted([wikiutils.unlinkify(t) for t in teams])
    toinsert = cleaned([t[0] for t in tlist])
    previous = None
    teamPairList = [p[1] for p in l]
    prev = None
    for t in l:
        thistl = cleaned([x[0] for x in t[1]])
        if thistl == toinsert:
            prev = t
            break
    if prev:
        numLinksInPrev = len([x for x in prev[1] if x[1]])
        numLinksInThis = len([x for x in tlist if x[1]])
        if numLinksInThis > numLinksInPrev:
            l.remove(prev)
            l.append((heading, tlist))
    else:
        l.append((heading, tlist))
Пример #3
0
def getTopLeagues():
    templates = ['UEFA_leagues', 'CONMEBOL_leagues', 'CONCACAF_leagues', 
            'CAF_leagues', 'AFC_leagues', 'OFC_leagues']
    leagues = dict()
    for t in templates:
        confederationname = t.split('_')[0]
        text = wikiutils.getPage('Template:' + t)
        if text:
            print 'done.'
            state = 0
            for line in text.split('\n'):
                lineWithoutSpaces = ''.join(line.split())
                if state == 0 and re.match('\|list[123456789]=', lineWithoutSpaces):
                    state = 1

                elif state == 1:
                    if lineWithoutSpaces:
                        if (lineWithoutSpaces[0] == '|' or lineWithoutSpaces[0] == '}'):
                            state == 0
                        if lineWithoutSpaces[0] == '*':
                            v = line.strip('*').strip()
                            name, link = wikiutils.unlinkify(v)
                            if link:
                                leagues[link] = (name, name, link, confederationname)
                                print 'Found', name
    return leagues
Пример #4
0
def getLeagueData(rvtext, leaguedata):
    season = ''
    relegationleagues = dict()
    numteams = 0
    levelnum = 0
    divisions = 0
    class InfoboxState:
        Outside = 0
        Entered = 1
        RelegationLeagues = 2
        NumTeams = 3
        NumLevel = 4
        Season = 5

    ibs = InfoboxState.Outside

    for line in rvtext.split('\n'):
        lineWithoutSpaces = ''.join(line.split())
        if not season and lineWithoutSpaces.startswith("|current="):
            k, v = wikiutils.getKeyValue(line)
            competition, competitionlink = wikiutils.unlinkify(v)
            if competitionlink:
                season = competitionlink

        if not divisions and (lineWithoutSpaces.startswith("|divisions=") or lineWithoutSpaces.startswith("|division=")):
            tp = wikiutils.getNumberKeyValue(line)
            if tp:
                divisions = tp

        if not levelnum and (lineWithoutSpaces.startswith("|levels=") or lineWithoutSpaces.startswith("|level=")):
            tp = wikiutils.getNumberKeyValue(line)
            if tp:
                levelnum = tp

        if len(relegationleagues) == 0 and lineWithoutSpaces.startswith("|relegation="):
            k, v = wikiutils.getKeyValue(line)
            candidates = [wikiutils.unlinkify(x.strip()) for x in br_re.split(v)]
            for cn, cl in candidates:
                if cl:
                    relegationleagues[cl] = cl

        if not numteams and lineWithoutSpaces.startswith('|teams='):
            numteams = wikiutils.getNumberKeyValue(line)

        if ibs == InfoboxState.Outside and lineWithoutSpaces.startswith('{|class="infoboxfootball"'):
            # e.g. Regionalliga_Nord
            ibs = InfoboxState.Entered
        elif ibs != InfoboxState.Outside:
            if lineWithoutSpaces and lineWithoutSpaces[0] == '|':
                text = '|'.join(line.split('|')[2:])
                if not text and lineWithoutSpaces[0:2] == '|}':
                    ibs = InfoboxState.Outside
                    break
                elif text:
                    t, link = wikiutils.unlinkify(text)
                    tl = t.lower()
                    if 'background' in line:
                        if 'relegation' in tl:
                            ibs = InfoboxState.RelegationLeagues
                        elif 'number of clubs' in tl:
                            ibs = InfoboxState.NumTeams
                        elif 'level' in tl:
                            ibs = InfoboxState.NumLevel
                        elif 'current season' in tl:
                            ibs = InfoboxState.Season
                        else:
                            ibs = InfoboxState.Entered
                    else:
                        if ibs == InfoboxState.RelegationLeagues:
                            if not link:
                                ibs = InfoboxState.Entered
                            else:
                                relegationleagues[link] = link
                        elif ibs == InfoboxState.NumTeams:
                            pos = re.findall(r'\d+', t)
                            if len(pos) >= 1:
                                numteams = int(pos[0])
                        elif ibs == InfoboxState.NumLevel:
                            pos = re.findall(r'\d+', t)
                            if len(pos) >= 1:
                                levelnum = int(pos[0])
                        elif ibs == InfoboxState.Season:
                            if not link:
                                ibs = InfoboxState.Entered
                            else:
                                season = link

    if not leaguedata.season:
        leaguedata.season = season
    if not leaguedata.relegationleagues:
        leaguedata.relegationleagues = relegationleagues
    if not leaguedata.numteams:
        leaguedata.numteams = numteams
    if not leaguedata.divisions:
        leaguedata.divisions = divisions
    if not leaguedata.levelnum:
        leaguedata.levelnum = levelnum