Exemplo n.º 1
0
def crawl(sport, year, division, org, game, url, neutral=False):
    global data
    data = data.format(sport, year, division)

    gamename = game.replace('/', '.')

    def readFlag(flag):
        if not os.path.exists(os.path.join(data, org, gamename)):
            os.mkdir(os.path.join(data, org, gamename))

        return os.path.exists(os.path.join(data, org, gamename, flag))

    def setFlag(flag):
        with open(os.path.join(data, org, gamename, flag), 'w') as f:
            pass

    if neutral and not readFlag(".neutral"):
        setFlag(".neutral")

    filename = os.path.join(data, org, gamename, "{}.csv")

    if not readFlag(".done"):
        try:
            gamelink = urljoin(domain, url)
            log("{} {} {} {} {} {}".format(sport, year, division, org, game, dumpURL(gamelink)))

            gs = parseURL(gamelink)

            sleep(2)

            gamescore = None
            gameinfo = None

            periods = []
            teams = []
            nextPeriod = 0
            for table in gs.select("div.header_menu a"):
                if (
                        table["href"] == "#" or
                        not (
                            table["href"].startswith("/game/box_score") or
                            table["href"].startswith("/game/play_by_play")
                        )
                    ):
                    continue

                tablelink = urljoin(domain, table["href"])
                print2("{} \033[4m{}\033[0m".format(table.text.strip(), tablelink))

                ts = parseURL(tablelink)

                if gamescore is None:
                    gamescore = parseTable(ts.select("table:nth-of-type(1)")[0])
                    dumpTable(
                        gamescore,
                        filename.format("Score")
                    )

                if gameinfo is None:
                    gameinfo = transposeTable(
                        parseTable(ts.select("table:nth-of-type(3)")[0]) +
                        parseTable(ts.select("table:nth-of-type(4)")[0])
                    )
                    dumpTable(
                        gameinfo,
                        filename.format("Info")
                    )

                teams = [gamescore[1][0].text.strip(), gamescore[2][0].text.strip()]
                periods = [v.text.strip() for v in gamescore[0][1:]]

                if table["href"].startswith("/game/box_score"):
                    if table.text.strip() == "Box Score":
                        sfilename = filename.format("Box Score - {}")
                    else:
                        sfilename = filename.format(periods[nextPeriod] + " - {}")
                        nextPeriod += 1

                    dumpTable(
                        parseTable(ts.select("table:nth-of-type(5)")[0], header=1),
                        sfilename.format(teams[0])
                    )
                    dumpTable(
                        parseTable(ts.select("table:nth-of-type(6)")[0], header=1),
                        sfilename.format(teams[1])
                    )
                elif table["href"].startswith("/game/play_by_play"):
                    sfilename = filename.format("Play by Play - {}")

                    for (i, period) in enumerate(periods[:-1]):
                        dumpTable(
                            parseTable(ts.select("table:nth-of-type({})".format(6 + 2 * i))[0], header=0),
                            sfilename.format(period)
                        )

                sleep(2)

            if gamescore == gameinfo == None:
                raise Exception("Not a game.")

            setFlag(".done")

            sleep(2)
        except Exception as e:
            print2(colored("Error: ", "red"), e)
        finally:
            print2()

    if not readFlag(".parsed"):
        try:
            gamelink = urljoin(domain, url)
            log("{} {} {} {} {} {}".format(sport, year, division, org, game, dumpURL(gamelink)))
            print2("Parsing...")

            gamescore = loadTable(filename.format("Score"))

            sfilename = filename.format("Box Score - {}")
            teams = [gamescore[1][0], gamescore[2][0]]
            with open(filename.format("Box Score - All (Parsed)"), "w") as af:
                for team in teams:
                    boxScore = parseBoxScore(
                        sfilename.format(team),
                        filename.format("Info"),
                        team,
                        "All"
                    )

                    rawDumpTable(boxScore[(0 if team == teams[0] else 1):], af)

            sfilename = filename.format("Play by Play - {}")
            periods = gamescore[0][1:]
            with open(filename.format("Play by Play - All (Parsed)"), "w") as af:
                for period in periods[:-1]:
                    playByPlay = parsePlayByPlay(
                        sfilename.format(period),
                        period,
                        filename.format("Info")
                    )

                    rawDumpTable(playByPlay[(0 if period == periods[0] else 1):], af)

            setFlag(".parsed")
        except Exception as e:
            print2(colored("Error: ", "red"), e)
        finally:
            print2()
Exemplo n.º 2
0
            if player != "Team":
                if r[-2] == "Leaves Game":
                    if player in initPlayers[i]:
                        initPlayers[i].remove(player)
                else:
                    initPlayers[i].add(player)

            lineup = "; ".join(sorted(initPlayers[i]))
            r.append(lineup)

            lineupTime = r[3]
            if lastLineup[i] == None:
                lastLineup[i] = lineup

            lineupTimes[i][lastLineup[i]] = lineupTimes[i].setdefault(lastLineup[i], 0) + computeSeconds(lastLineupTime[i]) - computeSeconds(lineupTime)

            r.append(computeTime(lineupTimes[i].setdefault(lineup, 0)))

            lastLineup[i], lastLineupTime[i] = lineup, lineupTime

    addLineup(table)

    return table

if __name__ == "__main__":
    rawDumpTable(
        parse(sys.argv[1], sys.argv[2], sys.argv[3]),
        sys.stdout
    )
Exemplo n.º 3
0
    try:
        box = loadTable(join(path, 'Box Score - All (Parsed).csv'))
        row = [r for r in box[1:] if r[3] == org][-1]

        isHome = row[3] == box[-1][3]
        home = "Home" if isHome else "Away"
        opponent = box[1 if isHome else -1][3]

        info = loadTable(join(path, 'Info.csv'))
        location, attendance = "", ""
        for (c, v) in zip(info[0], info[1]):
            if c == "Location:":
                location = v
            elif c == "Attendance:":
                attendance = v

        row = [gender, year, div] + row[:2] + [org, opponent, home] + row[6:] + [location, attendance]

        output.append(row)
    except:
        print("Exception: {}".format(path), file=sys.stderr)

for (gender, genderDir) in ls(sys.argv[1]):
    for (year, yearDir) in ls(genderDir):
        for (div, divDir) in ls(yearDir):
            for (org, orgDir) in ls(divDir):
                for (game, gameDir) in ls(orgDir):
                    extract(gender, year, div, org, game, gameDir)

rawDumpTable(output, sys.stdout)