def game_filter(csv_file): # Takes in a csv filepath of one of the EightThirtyFour data sets # and filters the data to games with a 10 or less point lead in # the last 6 minutes of the game. pbp = Table().read_table(csv_file) unique_games = pbp.group('GAME_ID').column(0) print(unique_games) last_quarter = pbp.where('PERIOD', predicates.are.equal_to(4)) transformed_minutes = last_quarter.apply(time_string_to_number, 'PCTIMESTRING') last_quarter_and_minutes = last_quarter.with_column( 'TIME', transformed_minutes) between_six_and_seven = last_quarter_and_minutes.where( 'TIME', predicates.are.below_or_equal_to(6.5)) close_games = make_array() for game in unique_games: game_scores_only = between_six_and_seven.where( 'GAME_ID', predicates.are.equal_to(game)).select('TIME', 'SCORE').where( 'SCORE', predicates.are.not_equal_to('nan')) score = game_scores_only.row(0).item(1) t1, t2 = score.split('-') if abs(int(t1) - int(t2)) <= 10: close_games = np.append(close_games, game) return close_games
new_full_names = [] # from last,first to first,last for name in full_names: if ',' in name: first_name = name.split(',')[1].strip() last_name = name.split(',')[0].strip() new_full_names.append(first_name + " " + last_name) else: new_full_names.append(name) # compile into a table books = Table().with_columns("SEMESTER", semesters, "SECTIONS", section, "CLASS NAME", class_name, "BOOK TITLES", book_names, "AUTHOR", new_full_names) grouped = books.group(["SEMESTER", "AUTHOR"]) print(grouped) # "calculate" genders from Wikipedia articles gender = [] seen = {} # memoization: author -> gender wiki = WikiApi() for author in grouped.column("AUTHOR"): if author.lower() in seen: print(author, "already found previously") gender.append(seen[author.lower()]) continue try: try: print("trying to find " + author + " in wikipedia")