def test_make_array(): test1 = ds.make_array(0) assert len(test1) == 1 test2 = ds.make_array(2, 3, 4) assert sum(test2) == 9 assert test2.dtype == "int64" test3 = ds.make_array("foo", "bar") assert test3.dtype == "<U3"
def game_filter(csv_file): # Takes in a csv filepath of one of the EightThirtyFour data sets # and filters the data to games with a 10 or less point lead in # the last 6 minutes of the game. pbp = Table().read_table(csv_file) unique_games = pbp.group('GAME_ID').column(0) print(unique_games) last_quarter = pbp.where('PERIOD', predicates.are.equal_to(4)) transformed_minutes = last_quarter.apply(time_string_to_number, 'PCTIMESTRING') last_quarter_and_minutes = last_quarter.with_column( 'TIME', transformed_minutes) between_six_and_seven = last_quarter_and_minutes.where( 'TIME', predicates.are.below_or_equal_to(6.5)) close_games = make_array() for game in unique_games: game_scores_only = between_six_and_seven.where( 'GAME_ID', predicates.are.equal_to(game)).select('TIME', 'SCORE').where( 'SCORE', predicates.are.not_equal_to('nan')) score = game_scores_only.row(0).item(1) t1, t2 = score.split('-') if abs(int(t1) - int(t2)) <= 10: close_games = np.append(close_games, game) return close_games
def test_sample_proportions(): uniform = ds.sample_proportions(1000, np.ones(50) / 50) assert len(uniform) == 50 and _round_eq(1, sum(uniform)) assert [ x in (0, 0.5, 1) for x in ds.sample_proportions(2, ds.make_array(.2, .3, .5)) ]
def test_proportions_from_distribution(): t = ds.Table().with_column('probs', np.ones(50) / 50) u = ds.proportions_from_distribution(t, 'probs', 1000) assert t.num_columns == 1 and t.num_rows == 50 assert u.num_columns == 2 and u.num_rows == 50 uniform = u.column(1) assert len(uniform) == 50 and _round_eq(1, sum(uniform)) assert [ x in (0, 0.5, 1) for x in ds.sample_proportions(2, ds.make_array(.2, .3, .5)) ]
def conditional_dist(self, label, given='', show_ev=False): """ Given the random variable label, finds the conditional distribution of the other variable. Parameters ---------- label : String Variable given. Returns ------- JointDistribution Table Examples -------- >>> coins = Table().values('Coin1', ['H', 'T'], 'Coin2', ['H','T']).probabilities(np.array([0.24, 0.36, 0.16,0.24])).to_joint() >>> coins.conditional_dist('Coin1', 'Coin2') Coin1=H Coin1=T Sum Dist. of Coin1 | Coin2=H 0.6 0.4 1.0 Dist. of Coin1 | Coin2=T 0.6 0.4 1.0 Marginal of Coin1 0.6 0.4 1.0 >>> coins.conditional_dist('Coin2', 'Coin1') Dist. of Coin2 | Coin1=H Dist. of Coin2 | Coin1=T Marginal of Coin2 Coin2=H 0.4 0.4 0.4 Coin2=T 0.6 0.6 0.6 Sum 1.0 1.0 1.0 """ # TODO Refactor this function. if label == self._Y_column_label: both = self.both_marginals() new = np.append(both.index[0:-1], 'Sum') y = both.apply(conditional, axis=0).set_index(new) matrix = y.to_numpy()[:-1, :] y_labels = list(self.index) domain = np.array([evaluate(lab) for lab in y_labels]) exp_values = [ sum(matrix[:, i] * domain) for i in range(len(matrix[0])) ] column_names = y.columns new = make_array() for i in np.arange(len(column_names) - 1): new_name = 'Dist. of {0} | '.format(self._Y_column_label) new_name += column_names[i] new = np.append(new, new_name) new = np.append(new, 'Marginal of {0}'.format(self._Y_column_label)) y.columns = new if show_ev: y.loc['EV'] = exp_values return y elif label == self._X_column_label: both = self.both_marginals() x = both.apply(conditional, axis=1).rename( columns={ 'Sum: Marginal of {0}'.format(self._Y_column_label): 'Sum' }) matrix = x.to_numpy()[:, :-1] x_labels = list(self) domain = np.array([evaluate(lab) for lab in x_labels]) exp_values = [sum(matrix[i] * domain) for i in range(len(matrix))] indices = both.index new = make_array() for i in np.arange(len(indices) - 1): new_name = 'Dist. of {0} | '.format(self._X_column_label) new_name += indices[i] new = np.append(new, new_name) new = np.append(new, 'Marginal of {0}'.format(self._X_column_label)) new_df = x.set_index(new) if show_ev: new_df['EV'] = exp_values return new_df else: raise AssertionError( 'Label does not correspond with existing variable name')