Пример #1
0
def teamMain():
    DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/'
    teamIds = loadTeamIds(DIR + 'teamidshortname.csv')
    teamNames = [x[1] for x in loadMatrixFromFile(DIR + 'teamidshortname.csv')]
    countTotal = 0
    total = 0

    for team in teamIds:
        trainData = buildTrainingSets(DIR + team + '-train.csv.knn')
        trainLabels = buildTrainingLabels(DIR + team + '-train.csv.knn')
        testData = buildTestingSets(DIR + team + '-test.csv.knn')
        testLabels = buildTestingLabels(DIR + team + '-test.csv.knn')
        total = total + len(testLabels)

        svm = cv2.SVM()
        svm.train(trainData, trainLabels, params=svm_params)
        svm.save('svm_data.dat')

        # Accuracy
        count = 0
        for i in range(len(testLabels)):
            ret = svm.predict(np.array([testData[i]]))
            if ret == testLabels[i][0]:
                count = count + 1
        countTotal = countTotal + count
        print 'INFO: Accuracy(', team, ')', count / float(len(testLabels))
    print 'INFO: Total Accuracy: ', countTotal / float(total)
Пример #2
0
def teamMain():
    DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/'
    teamIds = loadTeamIds(DIR + 'teamidshortname.csv')
    teamNames = [x[1] for x in loadMatrixFromFile(DIR + 'teamidshortname.csv')]
    countTotal = 0
    total = 0

    for team in teamIds:
        trainData = buildTrainingSets(DIR + team + '-train.csv')
        trainLabels = buildTrainingLabels(DIR + team + '-train.csv')
        testData = buildTestingSets(DIR + team + '-test.csv')
        testLabels = buildTestingLabels(DIR + team + '-test.csv')
        total = total + len(testLabels)

        knn = cv2.KNearest()
        knn.train(trainData, trainLabels)

        # Accuracy
        count = 0
        for i in range(len(testLabels)):
            ret, results, neighbours, dist = knn.find_nearest(
                np.array([testData[i]]), 11)
            if results[0][0] == testLabels[i][0]:
                count = count + 1

        countTotal = countTotal + count
        print 'INFO: Accuracy(', teamNames[teamIds.index(
            team)], ')', count / float(len(testLabels))
    print 'INFO: Total Accuracy: ', countTotal / float(total)
Пример #3
0
def teamMain():
  DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/'
  teamIds = loadTeamIds(DIR + 'teamidshortname.csv')
  teamNames = [x[1] for x in loadMatrixFromFile(DIR + 'teamidshortname.csv')]
  countTotal = 0
  total = 0

  for team in teamIds:
    trainData = buildTrainingSets(DIR + team + '-train.csv.knn')
    trainLabels = buildTrainingLabels(DIR + team + '-train.csv.knn')
    testData = buildTestingSets(DIR + team + '-test.csv.knn')
    testLabels = buildTestingLabels(DIR + team + '-test.csv.knn')
    total = total + len(testLabels)

    svm = cv2.SVM()
    svm.train(trainData, trainLabels, params=svm_params)
    svm.save('svm_data.dat')
    
    # Accuracy
    count = 0
    for i in range(len(testLabels)):
      ret = svm.predict(np.array([testData[i]]))
      if ret == testLabels[i][0]:
        count = count + 1
    countTotal = countTotal + count
    print 'INFO: Accuracy(', team, ')', count/float(len(testLabels))
  print 'INFO: Total Accuracy: ', countTotal/float(total)
Пример #4
0
def teamMain():
    DIR = '/home/archer/Documents/Python/maxent/data/basketball/leaguerank/'
    teamIds = loadTeamIds(DIR + 'teamidshortname.csv')
    teamNames = [x[1] for x in loadMatrixFromFile(DIR + 'teamidshortname.csv')]
    countTotal = 0
    total = 0

    for team in teamIds:
        train = buildTrainingSets(DIR + team + '-train.csv')
        test = buildTestingSets(DIR + team + '-test.csv')
        labels = buildTestingLabels(DIR + team + '-test.csv')
        total = total + len(labels)

        classifier = NaiveBayesClassifier.train(train)
        res = classifier.batch_classify(test)

        # accuracy
        count = 0
        for i in range(len(res)):
            if labels[i] == res[i]:
                count = count + 1

        countTotal = countTotal + count
        print 'INFO: Accuracy(', teamNames[teamIds.index(
            team)], ')', count / float(len(res))
    print 'INFO: Total Accuracy: ', countTotal / float(total)
Пример #5
0
def teamMain():
  DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/'
  teamIds = loadTeamIds(DIR + 'teamidshortname.csv')
  teamNames = [x[1] for x in loadMatrixFromFile(DIR + 'teamidshortname.csv')]
  countTotal = 0
  total = 0

  for team in teamIds:
    train = buildTrainingSets(DIR + team + '-train.csv')
    test = buildTestingSets(DIR + team + '-test.csv')
    labels = buildTestingLabels(DIR + team + '-test.csv')
    total = total + len(labels)
    
    # train
    classifier = nltk.MaxentClassifier.train(train, 'IIS', trace=0, max_iter=1000)
    
    # test
    count = 0
    for i in range(len(labels)):
      pdist = classifier.prob_classify(test[i])
      if pdist.prob('L') >= pdist.prob('W'):
        flag = 'L'
      else:
        flag = 'W'
      
      #print 'DEBUG: ', flag, labels[i]
      if flag == labels[i]:
        count = count + 1
        
    print 'INFO: accuracy ', team, " ", float(count)/len(labels)
Пример #6
0
def teamMain():
    DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/'
    teamIds = loadTeamIds(DIR + 'teamidshortname.csv')
    teamNames = [x[1] for x in loadMatrixFromFile(DIR + 'teamidshortname.csv')]
    countTotal = 0
    total = 0

    for team in teamIds:
        train = buildTrainingSets(DIR + team + '-train.csv')
        test = buildTestingSets(DIR + team + '-test.csv')
        labels = buildTestingLabels(DIR + team + '-test.csv')
        total = total + len(labels)

        # train
        classifier = nltk.MaxentClassifier.train(train,
                                                 'IIS',
                                                 trace=0,
                                                 max_iter=1000)

        # test
        count = 0
        for i in range(len(labels)):
            pdist = classifier.prob_classify(test[i])
            if pdist.prob('L') >= pdist.prob('W'):
                flag = 'L'
            else:
                flag = 'W'

            #print 'DEBUG: ', flag, labels[i]
            if flag == labels[i]:
                count = count + 1

        print 'INFO: accuracy ', team, " ", float(count) / len(labels)
Пример #7
0
def teamMain():
  DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/'
  teamIds = loadTeamIds(DIR + 'teamidshortname.csv')
  teamNames = [x[1] for x in loadMatrixFromFile(DIR + 'teamidshortname.csv')]
  countTotal = 0
  total = 0

  for team in teamIds:
    trainData = buildTrainingSets(DIR + team + '-train.csv')
    trainLabels = buildTrainingLabels(DIR + team + '-train.csv')
    testData = buildTestingSets(DIR + team + '-test.csv')
    testLabels = buildTestingLabels(DIR + team + '-test.csv')
    total = total + len(testLabels)

    knn = cv2.KNearest()
    knn.train(trainData, trainLabels)

    # Accuracy
    count = 0
    for i in range(len(testLabels)):
      ret, results, neighbours, dist = knn.find_nearest(np.array([testData[i]]), 31)
      if results[0][0] == testLabels[i][0]:
        count = count + 1

    countTotal = countTotal + count
    print 'INFO: Accuracy(', teamNames[teamIds.index(team)], ')', count/float(len(testLabels))
  print 'INFO: Total Accuracy: ', countTotal/float(total)
Пример #8
0
def generateTestDataByTeams():
  DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/'
  teamIds = loadTeamIds(DIR + 'teamidshortname.csv')
  teamNames = [row[1] for row in loadMatrixFromFile(DIR + 'teamidshortname.csv')]

  for teamId in teamIds:
    res = generateTestDataByTeam(teamId)
    outputFile = DIR + teamId + '-test.csv.knn'
    print 'INFO: ', outputFile
    saveMatrixToFile(outputFile, res)
Пример #9
0
def generateTestDataByTeam(teamId):
  DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/'
  teamIds = loadTeamIds(DIR + 'teamidshortname.csv')
  teamNames = [row[1] for row in loadMatrixFromFile(DIR + 'teamidshortname.csv')]
  seasons = loadSeasons(DIR + 'seasons-18-Nov-2014.txt')
  res = []
  
  for season in seasons:
    mat = loadMatrixFromFile(DIR + season + '.playoff.csv')
    for row in mat:
      if teamNames[teamIds.index(teamId)] not in row[6]:
        continue

      if row[0] == 'W':
        WIN = 1
      else:
        WIN = 0
     
      if 'vs.' in row[6]:
        HOME = 1
      else:
        HOME = 0

      season = row[3]
      #heightTotal, weightTotal, ageTotal, expTotal = loadMatrixFromFile(DIR + teamId + '.' + season + '.player.csv.processed.total')[0]
      #heightTotal, weightTotal, ageTotal, expTotal = loadMatrixFromFile(DIR + teamId + '.' + season + '.player.csv.processed.avg')[0]
      heightTotal, weightTotal, ageTotal, expTotal = loadMatrixFromFile(DIR + teamId + '.' + season + '.player.csv.processed.norm')[0]

      leagueranks = loadMatrixFromFile(DIR + season + '.l')[0]
      leaguerank = leagueranks[teamNames.index(row[6][0:3])]

      vsTeamId = teamIds[teamNames.index(row[6][-3:])]
      #vsHeightTotal, vsWeightTotal, vsAgeTotal, vsExpTotal = loadMatrixFromFile(DIR + vsTeamId + '.' + season + '.player.csv.processed.total')[0]
      #vsHeightTotal, vsWeightTotal, vsAgeTotal, vsExpTotal = loadMatrixFromFile(DIR + vsTeamId + '.' + season + '.player.csv.processed.avg')[0]
      vsHeightTotal, vsWeightTotal, vsAgeTotal, vsExpTotal = loadMatrixFromFile(DIR + vsTeamId + '.' + season + '.player.csv.processed.norm')[0]
      vsLeaguerank = leagueranks[teamIds.index(vsTeamId)]

      tmp = []
      tmp.append(HOME)
      tmp.append(heightTotal)
      tmp.append(weightTotal)
      tmp.append(ageTotal)
      tmp.append(expTotal)
      tmp.append(leaguerank)

      tmp.append(vsHeightTotal)
      tmp.append(vsWeightTotal)
      tmp.append(vsAgeTotal)
      tmp.append(vsExpTotal)
      tmp.append(vsLeaguerank)

      tmp.append(WIN)
      
      res.append(tmp)
  return res
Пример #10
0
def generateTrainDataBySeason(season):
  DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/'
  teamIds = loadTeamIds(DIR + 'teamidshortname.csv')
  teamNames = [row[1] for row in loadMatrixFromFile(DIR + 'teamidshortname.csv')]
  leagueranks = loadMatrixFromFile(DIR + season + '.l')[0]
  res = []

  for team in teamIds:
    mat = loadMatrixFromFile(DIR + team + '.csv.sorted')
    for row in mat:
      if row[2] != season:
        continue

      if row[0] == 'W':
        WIN = 1
      else:
        WIN = 0
     
      if 'vs.' in row[5]:
        HOME = 1
      else:
        HOME = 0

      #heightTotal, weightTotal, ageTotal, expTotal = loadMatrixFromFile(DIR + team + '.' + season + '.player.csv.processed.total')[0]
      #heightTotal, weightTotal, ageTotal, expTotal = loadMatrixFromFile(DIR + team + '.' + season + '.player.csv.processed.avg')[0]
      heightTotal, weightTotal, ageTotal, expTotal = loadMatrixFromFile(DIR + team + '.' + season + '.player.csv.processed.norm')[0]

      leaguerank = leagueranks[teamNames.index(row[5][0:3])]

      vsTeamId = teamIds[teamNames.index(row[5][-3:])]
      #vsHeightTotal, vsWeightTotal, vsAgeTotal, vsExpTotal = loadMatrixFromFile(DIR + vsTeamId + '.' + season + '.player.csv.processed.total')[0]
      #vsHeightTotal, vsWeightTotal, vsAgeTotal, vsExpTotal = loadMatrixFromFile(DIR + vsTeamId + '.' + season + '.player.csv.processed.avg')[0]
      vsHeightTotal, vsWeightTotal, vsAgeTotal, vsExpTotal = loadMatrixFromFile(DIR + vsTeamId + '.' + season + '.player.csv.processed.norm')[0]
      vsLeaguerank = leagueranks[teamIds.index(vsTeamId)]

      tmp = []
      tmp.append(HOME)
      tmp.append(heightTotal)
      tmp.append(weightTotal)
      tmp.append(ageTotal)
      tmp.append(expTotal)
      tmp.append(leaguerank)

      tmp.append(vsHeightTotal)
      tmp.append(vsWeightTotal)
      tmp.append(vsAgeTotal)
      tmp.append(vsExpTotal)
      tmp.append(vsLeaguerank)

      tmp.append(WIN)

      res.append(tmp)
  return res
Пример #11
0
def main():
    DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/'
    seasons = loadSeasons(DIR + 'seasons-18-Nov-2014.txt')
    teamIds = loadTeamIds(DIR + 'teamidshortname.csv')
    seasonTypes = ['Regular Season', 'Playoffs']
    # print seasons
    # return
    for team in teamIds:
        for season in seasons:
            #for seasonType in seasonTypes:
            seasonType = 'Regular Season'
            n = NBAStatsTeamPlayerExtractor(team, season, seasonType)
            outputFile = DIR + team + '.' + season + '.player.csv'
            print 'INFO: Processing ', outputFile
            mat = n.getStats()
            if mat == False:
                saveMatrixToFile(outputFile, [])
            else:
                saveMatrixToFile(outputFile, mat)
def main():
  DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/'
  seasons = loadSeasons(DIR + 'seasons-18-Nov-2014.txt')
  teamIds = loadTeamIds(DIR + 'teamidshortname.csv')
  seasonTypes = ['Regular Season', 'Playoffs']
  # print seasons
  # return
  for team in teamIds:
    for season in seasons:
      #for seasonType in seasonTypes:
      seasonType = 'Regular Season'
      n = NBAStatsTeamPlayerExtractor(team, season, seasonType)
      outputFile = DIR + team + '.' + season + '.player.csv'
      print 'INFO: Processing ', outputFile
      mat = n.getStats()
      if mat == False:
        saveMatrixToFile(outputFile, [])
      else:
        saveMatrixToFile(outputFile, mat)
Пример #13
0
# -*- coding: utf-8 -*-
#
# Author: Archer
# Date: 05/Jun/2015
# File: InsertPlayer.py
# Desc: insert into NBA.Player table
#
# Produced By CSRGXTU
import MySQLdb as mdb
import sys
from Utility import loadSeasons, loadTeamIds, loadMatrixFromFile

basePath = '/home/archer/Documents/Python/maxent/data/basketball/leaguerank/'

seasons = loadSeasons(basePath + 'seasons-18-Nov-2014.txt')
teamIds = loadTeamIds(basePath + 'teamidname-18-Nov-2014.csv')


def insertPlayer(cur):
    for team in teamIds:
        sql = "select TeamID from Team where StatsID = '%s'" % team
        cur.execute(sql)
        TeamID = cur.fetchone()[0]

        for season in seasons:
            sql = "select SeasonID from Season where Season = '%s' and Season_SeasonTypeID = 2" % season
            cur.execute(sql)
            SeasonID = cur.fetchone()[0]

            matrix = loadMatrixFromFile(basePath + team + '.' + season +
                                        '.player.csv')
#!/usr/bin/env python
# coding = utf-8
# Author: Archer Reilly
# Date: 24/DEC/2014
# File: NBAStatsTeamPlayerDataProcessor.py
# Desc: the data downloaded from net isnt good, so need this
#   file process it before used in models
#
# Produced By CSRGXTU
from Utility import loadMatrixFromFile, saveMatrixToFile, readmatricefromfile, loadSeasons, loadTeamIds, saveLstToFile

DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/'
seasons = loadSeasons(DIR + 'seasons-18-Nov-2014.txt')
teamIds = loadTeamIds(DIR + 'teamidshortname.csv')


# noneWithAVG
# replace None with average value
#
# @param teamId
# @param season
# @return res list(list)
def noneWithAVG(teamId, season):
    DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/'
    mat = loadMatrixFromFile(DIR + teamId + "." + season + ".player.csv")
    if len(mat) == 0:
        return [[]]

    heights = []
    weights = []
    ages = []
Пример #15
0
    tmpLst.append(item['PTS'])

    tmpLst.append(ranking['PPG'])
    tmpLst.append(ranking['RPG'])
    tmpLst.append(ranking['APG'])
    tmpLst.append(ranking['OPPG'])

    tmpLst.append(profile['Height'])
    tmpLst.append(profile['Weight'])
    tmpLst.append(profile['Age'])

    appendlst2file(tmpLst, dataFile)

if __name__ == '__main__':
  # first, load team id
  teamIds = loadTeamIds('../../data/basketball/teamidname-18-Nov-2014.csv')

  # second, load seasons
  seasons = loadSeasons('../../data/basketball/seasons.txt')

  # seasonTypes
  seasonTypes = ['Regular Season']

  # leagueId
  leagueId = "00"

  # for teamId in teamIds:
  #   dataFile = '../../data/basketball/' + teamId + '.csv'
  #   for t in seasonTypes:
  #     for s in seasons:
  #       print "Processing " + teamId + " " + s + " " + t,
#!/usr/bin/env python
# coding = utf-8
# Author: Archer Reilly
# Date: 24/DEC/2014
# File: NBAStatsTeamPlayerDataProcessor.py
# Desc: the data downloaded from net isnt good, so need this
#   file process it before used in models
#
# Produced By CSRGXTU
from Utility import loadMatrixFromFile, saveMatrixToFile, readmatricefromfile, loadSeasons, loadTeamIds, saveLstToFile

DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/'
seasons = loadSeasons(DIR + 'seasons-18-Nov-2014.txt')
teamIds = loadTeamIds(DIR + 'teamidshortname.csv')

# noneWithAVG
# replace None with average value
#
# @param teamId
# @param season
# @return res list(list)
def noneWithAVG(teamId, season):
  DIR = '/home/archer/Documents/maxent/data/basketball/leaguerank/'
  mat = loadMatrixFromFile(DIR + teamId + "." + season + ".player.csv")
  if len(mat) == 0:
    return [[]]

  heights = []
  weights = []
  ages = []
  exps = []
Пример #17
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Author: Archer
# File: InsertTeamStats.py
# Date: 05/Jun/2015
# Desc: insert NBA.TeamStats table
#
# Produced By CSRGXTU
import MySQLdb as mdb
import sys
from Utility import loadMatrixFromFile, loadSeasons, loadTeamIds

teamIds = loadTeamIds(
    '/home/archer/Documents/Python/maxent/data/basketball/leaguerank/teamidshortname.csv'
)
seasons = loadSeasons(
    '/home/archer/Documents/Python/maxent/data/basketball/leaguerank/seasons-18-Nov-2014.txt'
)
TeamID2TeamShortNames = loadMatrixFromFile(
    '/home/archer/Documents/Python/maxent/data/basketball/leaguerank/TeamID2TeamShortName.csv'
)


def findId(shortName):
    for row in TeamID2TeamShortNames:
        if row[1] == shortName:
            return row[0]
    return False

Пример #18
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Author: Archer
# File: InsertTeamStats.py
# Date: 05/Jun/2015
# Desc: insert NBA.TeamStats table
#
# Produced By CSRGXTU
import MySQLdb as mdb
import sys
from Utility import loadMatrixFromFile, loadSeasons, loadTeamIds

teamIds = loadTeamIds('/home/archer/Documents/Python/maxent/data/basketball/leaguerank/teamidshortname.csv')
seasons = loadSeasons('/home/archer/Documents/Python/maxent/data/basketball/leaguerank/seasons-18-Nov-2014.txt')
TeamID2TeamShortNames = loadMatrixFromFile('/home/archer/Documents/Python/maxent/data/basketball/leaguerank/TeamID2TeamShortName.csv')

def findId(shortName):
    for row in TeamID2TeamShortNames:
        if row[1] == shortName:
            return row[0]
    return False

def isHome(matchUpString):
    if '@' in matchUpString:
        return 1
    else:
        return 0

def none20(lst):
    for i in range(len(lst)):
Пример #19
0
    tmpLst.append(item['PTS'])

    tmpLst.append(ranking['PPG'])
    tmpLst.append(ranking['RPG'])
    tmpLst.append(ranking['APG'])
    tmpLst.append(ranking['OPPG'])

    tmpLst.append(profile['Height'])
    tmpLst.append(profile['Weight'])
    tmpLst.append(profile['Age'])

    appendlst2file(tmpLst, dataFile)

if __name__ == '__main__':
  # first, load team id
  teamIds = loadTeamIds('../../data/basketball/leaguerank/teamidname-18-Nov-2014.csv')

  # second, load seasons
  seasons = loadSeasons('../../data/basketball/leaguerank/seasons-18-Nov-2014.txt')

  # seasonTypes
  seasonTypes = ['Playoffs']

  # leagueId
  leagueId = "00"

  """
  for teamId in teamIds:
    dataFile = '../../data/basketball/leaguerank/' + teamId + '.playoff.csv'
    for t in seasonTypes:
      for s in seasons: