def test_df_to_matrix(): lr = LogisticRegression() source_list = ['reach', 'sparser', 'signor'] cw = CountsScorer(lr, source_list) x_arr = cw.df_to_matrix(test_df) assert isinstance(x_arr, np.ndarray), 'x_arr should be a numpy array' assert x_arr.shape == (len(test_df), len(source_list)), \ 'stmt matrix dimensions should match test stmts' assert x_arr.shape == (len(test_df), len(source_list)) # Try again with statement type cw = CountsScorer(lr, source_list, use_stmt_type=True) num_types = len(cw.stmt_type_map) x_arr = cw.df_to_matrix(test_df) assert x_arr.shape == (len(test_df), len(source_list) + num_types), \ 'matrix should have a col for sources and other cols for every ' \ 'statement type.'
def test_check_df_cols_noerr(): """Test dataframe should not raise ValueError.""" lr = LogisticRegression() source_list = ['reach', 'sparser', 'signor'] cw = CountsScorer(lr, source_list) cw.df_to_matrix(test_df)
def test_check_df_cols_err(): """Drop a required column and make sure we get a ValueError.""" lr = LogisticRegression() source_list = ['reach', 'sparser', 'signor'] cw = CountsScorer(lr, source_list) cw.df_to_matrix(test_df.drop('stmt_type', axis=1))