Пример #1
0
def getSeasonsValues(event, seasons):
  if not isinstance(seasons, basestring):
    first = True
    v = []
    for s in seasons:
      if first:
        v = GameDataPoss.readSeasonEventComparables(s, event)
        first = False
      else:
        v = np.concatenate(v, GameDataPoss.readSeasonEventComparables(s, event))
  else:
    v = GameDataPoss.readSeasonEventComparables(seasons, event)
  return Values(v, passes = event == 'pass', shot = event == 'shot')
Пример #2
0
def getSeasonsValues(event, seasons):
    if not isinstance(seasons, basestring):
        first = True
        v = []
        for s in seasons:
            if first:
                v = GameDataPoss.readSeasonEventComparables(s, event)
                first = False
            else:
                v = np.concatenate(
                    v, GameDataPoss.readSeasonEventComparables(s, event))
    else:
        v = GameDataPoss.readSeasonEventComparables(seasons, event)
    return Values(v, passes=event == 'pass', shot=event == 'shot')
Пример #3
0
def getAllTeamValues(event, seasons):
  team_values = {}
  for season in seasons:
    teams = GameDataPoss.getSeasonTeamIDs(season)
    for t in teams:
      v = getTeamValues(event, t, season)
      team_values[t] = v
  return team_values
Пример #4
0
def getAllTeamValues(event, seasons):
    team_values = {}
    for season in seasons:
        teams = GameDataPoss.getSeasonTeamIDs(season)
        for t in teams:
            v = getTeamValues(event, t, season)
            team_values[t] = v
    return team_values
Пример #5
0
def plot_histo2d_all_teams(event, season, goals = False, outcome = False):
  print 'plotting hist2d for teams'
  teams = GameDataPoss.getSeasonTeamIDs(season)
  for t in teams:
    print 'Getting team', t
    v = getTeamValues(event, t, season)
    data = (v.getValues('x'), v.getValues('y'))
    fname = t + '_' + event + '-hist2d.png'
    histo2d(data, ranged = False, title = event.capitalize() + ' frequencies for team ' + t, numBins = get_bins(), save = True, filename = fname)
Пример #6
0
def plot_histo_passes_all_teams(season, save = False):
  print 'plotting hist for all teams'
  teams = GameDataPoss.getSeasonTeamIDs(season)
  for t in teams:
    print 'Team', t
    v = getTeamValues('pass', t, season)
    all_distances = v.getValues('distance')
    outcomes = v.getValues('outcome')
    success = all_distances[outcomes == 1]
    data = [all_distances, success]
    labels = ['All', 'Successful']
    multiHisto(data, labels, numBins = 20, title = 'Distances of Passes for ' + str(t), xlabel = 'Approximate Distance in yards')
Пример #7
0
def draw_season_graph(season='liga12'):
    fname = DATA_PATH + season + '-average-distances.txt'
    distances = np.loadtxt(fname, delimiter=',', dtype='float')
    teams = GameDataPoss.getSeasonTeamIDs(season)
    team_dict = get_team_dict(season)

    G = coloured_edge_graph(teams, distances, team_dict)

    # G = create_distance_graph(teams, distances)

    pos = get_position(G, team_dict)

    draw_colored_graph(G, pos)
    return G
Пример #8
0
def plot_histo2d_all_teams(event, season, goals=False, outcome=False):
    print 'plotting hist2d for teams'
    teams = GameDataPoss.getSeasonTeamIDs(season)
    for t in teams:
        print 'Getting team', t
        v = getTeamValues(event, t, season)
        data = (v.getValues('x'), v.getValues('y'))
        fname = t + '_' + event + '-hist2d.png'
        histo2d(data,
                ranged=False,
                title=event.capitalize() + ' frequencies for team ' + t,
                numBins=get_bins(),
                save=True,
                filename=fname)
Пример #9
0
def draw_season_graph(season='liga12'):
  fname = DATA_PATH + season + '-average-distances.txt'
  distances = np.loadtxt(fname, delimiter=',', dtype='float')
  teams = GameDataPoss.getSeasonTeamIDs(season)
  team_dict = get_team_dict(season)


  G = coloured_edge_graph(teams, distances, team_dict)

  # G = create_distance_graph(teams, distances)

  pos = get_position(G, team_dict)

  draw_colored_graph(G, pos)
  return G
Пример #10
0
def plot_histo_passes_all_teams(season, save=False):
    print 'plotting hist for all teams'
    teams = GameDataPoss.getSeasonTeamIDs(season)
    for t in teams:
        print 'Team', t
        v = getTeamValues('pass', t, season)
        all_distances = v.getValues('distance')
        outcomes = v.getValues('outcome')
        success = all_distances[outcomes == 1]
        data = [all_distances, success]
        labels = ['All', 'Successful']
        multiHisto(data,
                   labels,
                   numBins=20,
                   title='Distances of Passes for ' + str(t),
                   xlabel='Approximate Distance in yards')
Пример #11
0
def clustering_experiment(season, event, NUM_EXPERIMENTS=10, NUM_SAMPLES=10):
    """ Creats NUM_SAMPLES per team. Creates the feature for each sample, and performs K-Means clustering """

    data_teams = getAllTeamValues(event, [season])
    teams = GameDataPoss.getSeasonTeamIDs(season)
    num_teams = len(teams)

    # for i in xrange(NUM_EXPERIMENTS):
    X, y = get_team_histo2d_features(data_teams, teams)
    est = KMeans(n_clusters=num_teams)
    labels = est.fit_predict(X)
    label_dict = {}

    for i in xrange(len(labels)):
        if labels[i] not in label_dict:
            label_dict[labels[i]] = [y[i]]
        else:
            label_dict[labels[i]].append(y[i])

    return labels, y, label_dict
Пример #12
0
def distances_experiment(season, event, NUM_EXPERIMENTS = 10, NUM_SAMPLES = 10):
  """ Creates NUM_SAMPLES per team. Creates the features for each sample, and constructs the distance matrix between all samples. Computes 
  the average distance between the samples for each pair of teams (including itself). Repeats the experiment NUM_EXPERIMENTS times, 
  and takes the average distance. Returns the averaged value of the average distances between each pair of teams """
  random.seed(0)
  data_teams = getAllTeamValues(event, [season])
  teams = GameDataPoss.getSeasonTeamIDs(season)
  num_teams = len(teams)

  averages = np.zeros((num_teams, num_teams))

  for i in xrange(NUM_EXPERIMENTS):
    X, y = get_team_histo2d_features(data_teams, teams)
    dist_mat = create_distance_matrix(X)
    a = get_average_distances_between_teams(dist_mat, num_teams, NUM_SAMPLES)
    averages = averages + a

  averages = averages / float(NUM_EXPERIMENTS)
  saveArrayAsCsv(averages, season + '-average-distances.txt', precision = 10)
  return averages
Пример #13
0
def clustering_experiment(season, event, NUM_EXPERIMENTS = 10, NUM_SAMPLES = 10):
  """ Creats NUM_SAMPLES per team. Creates the feature for each sample, and performs K-Means clustering """
  
  data_teams = getAllTeamValues(event, [season])
  teams = GameDataPoss.getSeasonTeamIDs(season)
  num_teams = len(teams)

  # for i in xrange(NUM_EXPERIMENTS):
  X, y = get_team_histo2d_features(data_teams, teams)
  est = KMeans(n_clusters = num_teams)
  labels = est.fit_predict(X)
  label_dict = {}

  for i in xrange(len(labels)):
    if labels[i] not in label_dict:
      label_dict[labels[i]] = [y[i]]
    else:
      label_dict[labels[i]].append(y[i])

  
  return labels, y, label_dict
Пример #14
0
def knn_classification(event, seasons):
  """ Randomly resamples with replacement a season's worth of events for each team. Calculates the 2d histogram of each sample to create a feature vector,
  then performs knn classification """ 
  # Get Teams, Data
  print 'Getting data'
  data_teams = getAllTeamValues(event, seasons)
  teams = []
  for s in seasons:
    teams += GameDataPoss.getSeasonTeamIDs(s)
    
  print 'Splitting data'
  X, y = get_team_histo2d_features(data_teams, teams)
  # X,y = get_team_histo_pass_features(data_teams, teams)
  print X.shape
  print y.shape
  X_train, X_test, y_train, y_test = create_test_split(X, y, test_size=0.3)

  # Number of Neighbours
  print 'Classifying'
  x_predict = knn_classify(X_train, X_test, y_train, y_test, k = 5)
  return X, y, x_predict, X_test, y_test, X_train, y_train
Пример #15
0
def knn_classification(event, seasons):
    """ Randomly resamples with replacement a season's worth of events for each team. Calculates the 2d histogram of each sample to create a feature vector,
  then performs knn classification """
    # Get Teams, Data
    print 'Getting data'
    data_teams = getAllTeamValues(event, seasons)
    teams = []
    for s in seasons:
        teams += GameDataPoss.getSeasonTeamIDs(s)

    print 'Splitting data'
    X, y = get_team_histo2d_features(data_teams, teams)
    # X,y = get_team_histo_pass_features(data_teams, teams)
    print X.shape
    print y.shape
    X_train, X_test, y_train, y_test = create_test_split(X, y, test_size=0.3)

    # Number of Neighbours
    print 'Classifying'
    x_predict = knn_classify(X_train, X_test, y_train, y_test, k=5)
    return X, y, x_predict, X_test, y_test, X_train, y_train
Пример #16
0
def distances_experiment(season, event, NUM_EXPERIMENTS=10, NUM_SAMPLES=10):
    """ Creates NUM_SAMPLES per team. Creates the features for each sample, and constructs the distance matrix between all samples. Computes 
  the average distance between the samples for each pair of teams (including itself). Repeats the experiment NUM_EXPERIMENTS times, 
  and takes the average distance. Returns the averaged value of the average distances between each pair of teams """
    random.seed(0)
    data_teams = getAllTeamValues(event, [season])
    teams = GameDataPoss.getSeasonTeamIDs(season)
    num_teams = len(teams)

    averages = np.zeros((num_teams, num_teams))

    for i in xrange(NUM_EXPERIMENTS):
        X, y = get_team_histo2d_features(data_teams, teams)
        dist_mat = create_distance_matrix(X)
        a = get_average_distances_between_teams(dist_mat, num_teams,
                                                NUM_SAMPLES)
        averages = averages + a

    averages = averages / float(NUM_EXPERIMENTS)
    saveArrayAsCsv(averages, season + '-average-distances.txt', precision=10)
    return averages
Пример #17
0
def getGameValues(event, game, season):
    v = GameDataPoss.getGameAsSplitValues(season, game)
    return Values(v[event], passes=event == 'pass', shot=event == 'shot')
Пример #18
0
def getTeamValues(event, team, season):
    events = GameDataPoss.readTeamEventSplitValues(season, team, event)
    return Values(events, passes=event == 'pass', shot=event == 'shot')
Пример #19
0
def knn_classification_experiment(season, NUM_EXPERIMENTS = 200):
  np.random.seed(0)

  data_teams = getAllTeamValues('pass', [season])
  teams = GameDataPoss.getSeasonTeamIDs(season)

  num_passes = {}
  total_passes = 0
  for t in teams:
    team_values = data_teams[t]
    team_values = team_values.getSlicedValues(['x', 'y'])
    num_passes[t] = team_values.shape[0]
    team_values = team_values[np.where(team_values[:,0] >= 38.33333)]
    team_values = team_values[np.where(team_values[:,0] <= 76.66667)]
    num_passes[t] = (num_passes[t] - team_values.shape[0]) / float(num_passes[t])
    # total_passes += team_values.shape[0]
  print num_passes
  return num_passes
  

  accuracy = []
  confusion = []
  precision = []
  recall = []
  k = []

  tuned_params = [{'n_neighbors': [2, 3, 4, 5, 6, 7]}]
  # tuned_params = [{'n_neighbors': [1, 2, 3]}]

  for i in range(10):

    X, y = get_team_histo2d_features(data_teams, teams)

    for i in range(NUM_EXPERIMENTS):
      # print '-------------Experiment #', i, '-----------------'
      X_train, X_test, y_train, y_test = create_test_split(X, y, test_size=0.3)

      clf = GridSearchCV(KNeighborsClassifier(weights='distance'), param_grid = tuned_params, cv = cross_validation.StratifiedKFold(y_train))
      # clf = GridSearchCV(KNeighborsClassifier(), param_grid = tuned_params)
      clf.fit(X_train, y_train)

      y_true, y_pred = y_test, clf.predict(X_test)
      acc = clf.score(X_test, y_test)
      # print(classification_report(y_true, y_pred))
      # print acc
      # print clf.grid_scores_
      # print clf.best_params_
      # print confusion_matrix(y_true, y_pred)
      p = precision_score(y_true, y_pred, average=None)
      r = recall_score(y_true, y_pred, average=None)
      precision.append(p)
      recall.append(r)
      k.append(clf.best_params_['n_neighbors'])


      accuracy.append(acc)
      confusion.append(confusion_matrix(y_true, y_pred))

  c = confusion[0]
  for c_matrix in confusion[1:]:
    c = c + c_matrix
  c = c / float(NUM_EXPERIMENTS * 10)


  # saveArrayAsCsv(c, season + '_midfield_confusion_matrix.txt')

  return np.array(accuracy), c, np.array(precision), np.array(recall), np.array(k)
Пример #20
0
def getTeamValues(event, team, season):
  events = GameDataPoss.readTeamEventSplitValues(season, team, event)
  return Values(events, passes = event == 'pass', shot = event == 'shot')
Пример #21
0
def getGameValues(event, game, season):
  v = GameDataPoss.getGameAsSplitValues(season, game)
  return Values(v[event], passes = event == 'pass', shot = event == 'shot')
Пример #22
0
def knn_classification_experiment(season, NUM_EXPERIMENTS=200):
    np.random.seed(0)

    data_teams = getAllTeamValues('pass', [season])
    teams = GameDataPoss.getSeasonTeamIDs(season)

    num_passes = {}
    total_passes = 0
    for t in teams:
        team_values = data_teams[t]
        team_values = team_values.getSlicedValues(['x', 'y'])
        num_passes[t] = team_values.shape[0]
        team_values = team_values[np.where(team_values[:, 0] >= 38.33333)]
        team_values = team_values[np.where(team_values[:, 0] <= 76.66667)]
        num_passes[t] = (num_passes[t] - team_values.shape[0]) / float(
            num_passes[t])
        # total_passes += team_values.shape[0]
    print num_passes
    return num_passes

    accuracy = []
    confusion = []
    precision = []
    recall = []
    k = []

    tuned_params = [{'n_neighbors': [2, 3, 4, 5, 6, 7]}]
    # tuned_params = [{'n_neighbors': [1, 2, 3]}]

    for i in range(10):

        X, y = get_team_histo2d_features(data_teams, teams)

        for i in range(NUM_EXPERIMENTS):
            # print '-------------Experiment #', i, '-----------------'
            X_train, X_test, y_train, y_test = create_test_split(X,
                                                                 y,
                                                                 test_size=0.3)

            clf = GridSearchCV(KNeighborsClassifier(weights='distance'),
                               param_grid=tuned_params,
                               cv=cross_validation.StratifiedKFold(y_train))
            # clf = GridSearchCV(KNeighborsClassifier(), param_grid = tuned_params)
            clf.fit(X_train, y_train)

            y_true, y_pred = y_test, clf.predict(X_test)
            acc = clf.score(X_test, y_test)
            # print(classification_report(y_true, y_pred))
            # print acc
            # print clf.grid_scores_
            # print clf.best_params_
            # print confusion_matrix(y_true, y_pred)
            p = precision_score(y_true, y_pred, average=None)
            r = recall_score(y_true, y_pred, average=None)
            precision.append(p)
            recall.append(r)
            k.append(clf.best_params_['n_neighbors'])

            accuracy.append(acc)
            confusion.append(confusion_matrix(y_true, y_pred))

    c = confusion[0]
    for c_matrix in confusion[1:]:
        c = c + c_matrix
    c = c / float(NUM_EXPERIMENTS * 10)

    # saveArrayAsCsv(c, season + '_midfield_confusion_matrix.txt')

    return np.array(accuracy), c, np.array(precision), np.array(
        recall), np.array(k)