def getSeasonsValues(event, seasons): if not isinstance(seasons, basestring): first = True v = [] for s in seasons: if first: v = GameDataPoss.readSeasonEventComparables(s, event) first = False else: v = np.concatenate(v, GameDataPoss.readSeasonEventComparables(s, event)) else: v = GameDataPoss.readSeasonEventComparables(seasons, event) return Values(v, passes = event == 'pass', shot = event == 'shot')
def getSeasonsValues(event, seasons): if not isinstance(seasons, basestring): first = True v = [] for s in seasons: if first: v = GameDataPoss.readSeasonEventComparables(s, event) first = False else: v = np.concatenate( v, GameDataPoss.readSeasonEventComparables(s, event)) else: v = GameDataPoss.readSeasonEventComparables(seasons, event) return Values(v, passes=event == 'pass', shot=event == 'shot')
def getAllTeamValues(event, seasons): team_values = {} for season in seasons: teams = GameDataPoss.getSeasonTeamIDs(season) for t in teams: v = getTeamValues(event, t, season) team_values[t] = v return team_values
def plot_histo2d_all_teams(event, season, goals = False, outcome = False): print 'plotting hist2d for teams' teams = GameDataPoss.getSeasonTeamIDs(season) for t in teams: print 'Getting team', t v = getTeamValues(event, t, season) data = (v.getValues('x'), v.getValues('y')) fname = t + '_' + event + '-hist2d.png' histo2d(data, ranged = False, title = event.capitalize() + ' frequencies for team ' + t, numBins = get_bins(), save = True, filename = fname)
def plot_histo_passes_all_teams(season, save = False): print 'plotting hist for all teams' teams = GameDataPoss.getSeasonTeamIDs(season) for t in teams: print 'Team', t v = getTeamValues('pass', t, season) all_distances = v.getValues('distance') outcomes = v.getValues('outcome') success = all_distances[outcomes == 1] data = [all_distances, success] labels = ['All', 'Successful'] multiHisto(data, labels, numBins = 20, title = 'Distances of Passes for ' + str(t), xlabel = 'Approximate Distance in yards')
def draw_season_graph(season='liga12'): fname = DATA_PATH + season + '-average-distances.txt' distances = np.loadtxt(fname, delimiter=',', dtype='float') teams = GameDataPoss.getSeasonTeamIDs(season) team_dict = get_team_dict(season) G = coloured_edge_graph(teams, distances, team_dict) # G = create_distance_graph(teams, distances) pos = get_position(G, team_dict) draw_colored_graph(G, pos) return G
def plot_histo2d_all_teams(event, season, goals=False, outcome=False): print 'plotting hist2d for teams' teams = GameDataPoss.getSeasonTeamIDs(season) for t in teams: print 'Getting team', t v = getTeamValues(event, t, season) data = (v.getValues('x'), v.getValues('y')) fname = t + '_' + event + '-hist2d.png' histo2d(data, ranged=False, title=event.capitalize() + ' frequencies for team ' + t, numBins=get_bins(), save=True, filename=fname)
def plot_histo_passes_all_teams(season, save=False): print 'plotting hist for all teams' teams = GameDataPoss.getSeasonTeamIDs(season) for t in teams: print 'Team', t v = getTeamValues('pass', t, season) all_distances = v.getValues('distance') outcomes = v.getValues('outcome') success = all_distances[outcomes == 1] data = [all_distances, success] labels = ['All', 'Successful'] multiHisto(data, labels, numBins=20, title='Distances of Passes for ' + str(t), xlabel='Approximate Distance in yards')
def clustering_experiment(season, event, NUM_EXPERIMENTS=10, NUM_SAMPLES=10): """ Creats NUM_SAMPLES per team. Creates the feature for each sample, and performs K-Means clustering """ data_teams = getAllTeamValues(event, [season]) teams = GameDataPoss.getSeasonTeamIDs(season) num_teams = len(teams) # for i in xrange(NUM_EXPERIMENTS): X, y = get_team_histo2d_features(data_teams, teams) est = KMeans(n_clusters=num_teams) labels = est.fit_predict(X) label_dict = {} for i in xrange(len(labels)): if labels[i] not in label_dict: label_dict[labels[i]] = [y[i]] else: label_dict[labels[i]].append(y[i]) return labels, y, label_dict
def distances_experiment(season, event, NUM_EXPERIMENTS = 10, NUM_SAMPLES = 10): """ Creates NUM_SAMPLES per team. Creates the features for each sample, and constructs the distance matrix between all samples. Computes the average distance between the samples for each pair of teams (including itself). Repeats the experiment NUM_EXPERIMENTS times, and takes the average distance. Returns the averaged value of the average distances between each pair of teams """ random.seed(0) data_teams = getAllTeamValues(event, [season]) teams = GameDataPoss.getSeasonTeamIDs(season) num_teams = len(teams) averages = np.zeros((num_teams, num_teams)) for i in xrange(NUM_EXPERIMENTS): X, y = get_team_histo2d_features(data_teams, teams) dist_mat = create_distance_matrix(X) a = get_average_distances_between_teams(dist_mat, num_teams, NUM_SAMPLES) averages = averages + a averages = averages / float(NUM_EXPERIMENTS) saveArrayAsCsv(averages, season + '-average-distances.txt', precision = 10) return averages
def clustering_experiment(season, event, NUM_EXPERIMENTS = 10, NUM_SAMPLES = 10): """ Creats NUM_SAMPLES per team. Creates the feature for each sample, and performs K-Means clustering """ data_teams = getAllTeamValues(event, [season]) teams = GameDataPoss.getSeasonTeamIDs(season) num_teams = len(teams) # for i in xrange(NUM_EXPERIMENTS): X, y = get_team_histo2d_features(data_teams, teams) est = KMeans(n_clusters = num_teams) labels = est.fit_predict(X) label_dict = {} for i in xrange(len(labels)): if labels[i] not in label_dict: label_dict[labels[i]] = [y[i]] else: label_dict[labels[i]].append(y[i]) return labels, y, label_dict
def knn_classification(event, seasons): """ Randomly resamples with replacement a season's worth of events for each team. Calculates the 2d histogram of each sample to create a feature vector, then performs knn classification """ # Get Teams, Data print 'Getting data' data_teams = getAllTeamValues(event, seasons) teams = [] for s in seasons: teams += GameDataPoss.getSeasonTeamIDs(s) print 'Splitting data' X, y = get_team_histo2d_features(data_teams, teams) # X,y = get_team_histo_pass_features(data_teams, teams) print X.shape print y.shape X_train, X_test, y_train, y_test = create_test_split(X, y, test_size=0.3) # Number of Neighbours print 'Classifying' x_predict = knn_classify(X_train, X_test, y_train, y_test, k = 5) return X, y, x_predict, X_test, y_test, X_train, y_train
def knn_classification(event, seasons): """ Randomly resamples with replacement a season's worth of events for each team. Calculates the 2d histogram of each sample to create a feature vector, then performs knn classification """ # Get Teams, Data print 'Getting data' data_teams = getAllTeamValues(event, seasons) teams = [] for s in seasons: teams += GameDataPoss.getSeasonTeamIDs(s) print 'Splitting data' X, y = get_team_histo2d_features(data_teams, teams) # X,y = get_team_histo_pass_features(data_teams, teams) print X.shape print y.shape X_train, X_test, y_train, y_test = create_test_split(X, y, test_size=0.3) # Number of Neighbours print 'Classifying' x_predict = knn_classify(X_train, X_test, y_train, y_test, k=5) return X, y, x_predict, X_test, y_test, X_train, y_train
def distances_experiment(season, event, NUM_EXPERIMENTS=10, NUM_SAMPLES=10): """ Creates NUM_SAMPLES per team. Creates the features for each sample, and constructs the distance matrix between all samples. Computes the average distance between the samples for each pair of teams (including itself). Repeats the experiment NUM_EXPERIMENTS times, and takes the average distance. Returns the averaged value of the average distances between each pair of teams """ random.seed(0) data_teams = getAllTeamValues(event, [season]) teams = GameDataPoss.getSeasonTeamIDs(season) num_teams = len(teams) averages = np.zeros((num_teams, num_teams)) for i in xrange(NUM_EXPERIMENTS): X, y = get_team_histo2d_features(data_teams, teams) dist_mat = create_distance_matrix(X) a = get_average_distances_between_teams(dist_mat, num_teams, NUM_SAMPLES) averages = averages + a averages = averages / float(NUM_EXPERIMENTS) saveArrayAsCsv(averages, season + '-average-distances.txt', precision=10) return averages
def getGameValues(event, game, season): v = GameDataPoss.getGameAsSplitValues(season, game) return Values(v[event], passes=event == 'pass', shot=event == 'shot')
def getTeamValues(event, team, season): events = GameDataPoss.readTeamEventSplitValues(season, team, event) return Values(events, passes=event == 'pass', shot=event == 'shot')
def knn_classification_experiment(season, NUM_EXPERIMENTS = 200): np.random.seed(0) data_teams = getAllTeamValues('pass', [season]) teams = GameDataPoss.getSeasonTeamIDs(season) num_passes = {} total_passes = 0 for t in teams: team_values = data_teams[t] team_values = team_values.getSlicedValues(['x', 'y']) num_passes[t] = team_values.shape[0] team_values = team_values[np.where(team_values[:,0] >= 38.33333)] team_values = team_values[np.where(team_values[:,0] <= 76.66667)] num_passes[t] = (num_passes[t] - team_values.shape[0]) / float(num_passes[t]) # total_passes += team_values.shape[0] print num_passes return num_passes accuracy = [] confusion = [] precision = [] recall = [] k = [] tuned_params = [{'n_neighbors': [2, 3, 4, 5, 6, 7]}] # tuned_params = [{'n_neighbors': [1, 2, 3]}] for i in range(10): X, y = get_team_histo2d_features(data_teams, teams) for i in range(NUM_EXPERIMENTS): # print '-------------Experiment #', i, '-----------------' X_train, X_test, y_train, y_test = create_test_split(X, y, test_size=0.3) clf = GridSearchCV(KNeighborsClassifier(weights='distance'), param_grid = tuned_params, cv = cross_validation.StratifiedKFold(y_train)) # clf = GridSearchCV(KNeighborsClassifier(), param_grid = tuned_params) clf.fit(X_train, y_train) y_true, y_pred = y_test, clf.predict(X_test) acc = clf.score(X_test, y_test) # print(classification_report(y_true, y_pred)) # print acc # print clf.grid_scores_ # print clf.best_params_ # print confusion_matrix(y_true, y_pred) p = precision_score(y_true, y_pred, average=None) r = recall_score(y_true, y_pred, average=None) precision.append(p) recall.append(r) k.append(clf.best_params_['n_neighbors']) accuracy.append(acc) confusion.append(confusion_matrix(y_true, y_pred)) c = confusion[0] for c_matrix in confusion[1:]: c = c + c_matrix c = c / float(NUM_EXPERIMENTS * 10) # saveArrayAsCsv(c, season + '_midfield_confusion_matrix.txt') return np.array(accuracy), c, np.array(precision), np.array(recall), np.array(k)
def getTeamValues(event, team, season): events = GameDataPoss.readTeamEventSplitValues(season, team, event) return Values(events, passes = event == 'pass', shot = event == 'shot')
def getGameValues(event, game, season): v = GameDataPoss.getGameAsSplitValues(season, game) return Values(v[event], passes = event == 'pass', shot = event == 'shot')
def knn_classification_experiment(season, NUM_EXPERIMENTS=200): np.random.seed(0) data_teams = getAllTeamValues('pass', [season]) teams = GameDataPoss.getSeasonTeamIDs(season) num_passes = {} total_passes = 0 for t in teams: team_values = data_teams[t] team_values = team_values.getSlicedValues(['x', 'y']) num_passes[t] = team_values.shape[0] team_values = team_values[np.where(team_values[:, 0] >= 38.33333)] team_values = team_values[np.where(team_values[:, 0] <= 76.66667)] num_passes[t] = (num_passes[t] - team_values.shape[0]) / float( num_passes[t]) # total_passes += team_values.shape[0] print num_passes return num_passes accuracy = [] confusion = [] precision = [] recall = [] k = [] tuned_params = [{'n_neighbors': [2, 3, 4, 5, 6, 7]}] # tuned_params = [{'n_neighbors': [1, 2, 3]}] for i in range(10): X, y = get_team_histo2d_features(data_teams, teams) for i in range(NUM_EXPERIMENTS): # print '-------------Experiment #', i, '-----------------' X_train, X_test, y_train, y_test = create_test_split(X, y, test_size=0.3) clf = GridSearchCV(KNeighborsClassifier(weights='distance'), param_grid=tuned_params, cv=cross_validation.StratifiedKFold(y_train)) # clf = GridSearchCV(KNeighborsClassifier(), param_grid = tuned_params) clf.fit(X_train, y_train) y_true, y_pred = y_test, clf.predict(X_test) acc = clf.score(X_test, y_test) # print(classification_report(y_true, y_pred)) # print acc # print clf.grid_scores_ # print clf.best_params_ # print confusion_matrix(y_true, y_pred) p = precision_score(y_true, y_pred, average=None) r = recall_score(y_true, y_pred, average=None) precision.append(p) recall.append(r) k.append(clf.best_params_['n_neighbors']) accuracy.append(acc) confusion.append(confusion_matrix(y_true, y_pred)) c = confusion[0] for c_matrix in confusion[1:]: c = c + c_matrix c = c / float(NUM_EXPERIMENTS * 10) # saveArrayAsCsv(c, season + '_midfield_confusion_matrix.txt') return np.array(accuracy), c, np.array(precision), np.array( recall), np.array(k)