Exemplo n.º 1
0
def get_spreads_for_months_and_write_to_csv():
    # this fcn uses oddsshark to scrape past game lines

    all_line_data = lines.scrapeLineData()
    parsed_line_data = lines.parseLineData(all_line_data)
    final_data = lines.finalizeLineData(parsed_line_data)
    folder = jsonData.LOCAL_DATA_PATH
    filename = folder + 'game-line-data.csv'
    csvOps.writeToCsv(final_data, filename)
def getMissingData():

    filename = "missing_player_data.csv"
    fileLoc = jsonData.LOCAL_DATA_PATH + filename
    rows = csvOps.openCsvFromFileToArrays(fileLoc)
    gamesForSeason = api.getGamesInRange('2015-11-01', '2016-04-13')
    teamDict = jsonData.TEAM_ABBREV_TO_ID

    session = requests.Session()

    matchedData = []
    missingData = []
    toDelete = []

    for player in rows:
        playerId = player[0]
        brefId = player[1]
        missingDates = player[2].split(",")

        baseUrl = "http://www.basketball-reference.com/players/"
        firstLetter = brefId[0] + "/"
        endUrl = "/gamelog/2016/"

        fullUrl = baseUrl + firstLetter + brefId + endUrl
        
        rawHtml = session.get(fullUrl).content
        tree = html.fromstring(rawHtml)
        games = tree.cssselect('table#pgl_basic tbody tr:not(.thead)')
        totalGamesInSeason = len(games)

        # check for matching games, removing dates if they match:
        for game in games:
            gameDate = game[2].text_content().strip()
            teamId = teamDict[game[4].text_content().strip()]
            
            # check if matching game date found for missing date
            if gameDate in missingDates:
                gameBref = game[2].cssselect('a')[0].get('href').split("/")[-1].replace(".html", "")
                game = next((game for game in gamesForSeason if game["bref_slug"] == gameBref), None)
                missingDates.remove(gameDate)

                if game is None:
                    print("NO GAME MATCH", gameDate, str(missingDates), brefId)
                    continue

                dataRow = [playerId, gameDate, teamId, game["game_id"]]
                matchedData.append(dataRow)
        
        # for any leftover missing dates:
        for gameDate in missingDates:
            missingData.append([playerId, brefId, gameDate])

    csvOps.writeToCsv(matchedData, jsonData.LOCAL_DATA_PATH + "matched-data.csv")
Exemplo n.º 3
0
def get_usual_depth_positions(season):
    session = requests.Session()
    current_players = api.getCurrentPlayerData()
    # teams_url = 'http://www.basketball-reference.com/leagues/NBA_' + str(season) + '.html'
    depth_url = 'http://basketball.realgm.com/nba/depth-charts/' + str(season)

    page = session.get(depth_url)
    tree = html.fromstring(page.text)
    # team_rows = tree.cssselect('table#confs_standings_E tbody tr th a, table#confs_standings_W tbody tr th a')
    team_section = tree.cssselect('table.basketball tbody')

    all_depth_pos = {}

    for section in team_section:
        rows = section.cssselect('tr')
        for depth, row in enumerate(rows):
            players = row.cssselect("td.depth-chart-cell a")
            for player in players:
                player_name = " ".join(
                    player.get("href").split("/")[2].split("-")).strip()
                player_obj = next((player for player in current_players
                                   if player["player_name"] == player_name),
                                  None)
                player_depth = depth + 1

                if player_obj is None:
                    player_id = int(
                        input("What is the player_id for " + player_name +
                              "? "))
                    if player_id == 0:
                        continue
                else:
                    player_id = player_obj["player_id"]

                if player_id in all_depth_pos:
                    if all_depth_pos[player_id] < player_depth:
                        all_depth_pos[player_id] = player_depth
                else:
                    all_depth_pos[player_id] = player_depth

    depth_rows = []
    filename = './../local-data/usual_depth_pos_2017.csv'
    for player, depth in all_depth_pos.items():
        depth_rows.append([player, depth])

    csvOps.writeToCsv(depth_rows, filename)
Exemplo n.º 4
0
def pullGoogleTrainingDataAndWriteToCsv(dateArr, statType):
    '''
    Pulls in data for multiple dates, in an arr, for a specific stat type
    '''
    folder = jsonData.LOCAL_DATA_PATH
    # filename = 'nba-' + statType + '-google-initial-training-data.csv'
    filename = config["GOOGLE_CLOUD"]["TRAINING_DATA_FILES"][statType]
    location = folder + filename

    print("Pulling factor data from API...")
    data = getDataForMultipleDates(dateArr, statType, True,
                                   10)  # training = True

    print("Writing factor data to csv. File location: ", location)
    try:
        csv.writeToCsv(data, location)  # no header row!
        return location
    except:
        return False
Exemplo n.º 5
0
def pullAzureTrainingDataAndWriteToCsv(dateArr, statType):
    '''
    Pulls in data for multiple dates, in an arr, for a specific stat type
    '''
    folder = jsonData.LOCAL_DATA_PATH
    filename = 'nba-' + statType + '-azure-initial-training-data.csv'
    location = folder + filename

    print("Pulling factor data from API...")
    data = getDataForMultipleDates(dateArr, statType, True,
                                   10)  # training = True

    print("Writing factor data to csv. File location: ", location)
    try:
        csv.writeToCsv(data, location, header=getColumns())  # header row!
        print("CSV WRITE SUCCESS", location)
        return location
    except:
        print("COULDNT WRITE CSV")
        return False
Exemplo n.º 6
0
def pullRetrainDataAndWriteToCsv(retrainDate, statType):
    '''
    Pulls in data for a specific date to retrain 
    '''
    folder = './../local-data/retrain-data/'
    filename = retrainDate + '-nba-' + statType + '-retrain-data.csv'
    location = folder + filename

    print("Pulling retraining data from API...")
    # training = True, # recentGames = 10
    data = ml.getAndPrepFinalData(retrainDate, statType, True, 10)

    print("Data pulled, num rows ", len(data))

    print("Writing retraining data to csv w/ header. File location: ",
          location)
    try:
        csv.writeToCsv(data, location, header=ml.getColumns())
        return location
    except:
        return False