def test_converting_to_factors(): test_data = DataFrame( { 'colA': Series(randn(1, 5000).flatten() > 0), 'colB': Series(100 * randn(1, 5000).flatten()), 'colC': Series(100 + randn(1, 5000).flatten()), 'colD': Series(randn(1, 5000).flatten() > 0), }, ) test_data['colA'] = test_data['colA'].map(str) test_data['colD'] = test_data['colD'].map(str) factor_cols = [('colA', 'True'), ('colD', 'True')] rpy_test_df = com.convert_to_r_dataframe(test_data) rpy_out_df = Rtools.convert_columns_to_factors(rpy_test_df, factor_cols) test_cols = [('colA', 'factor'), ('colB', 'numeric'), ('colC', 'numeric'), ('colD', 'factor')] for col, typ in test_cols: if typ == 'factor': yield eq_, rpy_out_df.rx2(col).nlevels, 2 elif typ == 'numeric': yield ok_, (not hasattr(rpy_out_df.rx2(col), 'nlevels'))
def test_mixed_model(): test_data = DataFrame( { 'colA': Series(randn(1, 5000).flatten() > 0), 'colB': Series(100 * randn(1, 5000).flatten()), 'colC': Series(100 + randn(1, 5000).flatten()), 'colD': Series(randn(1, 5000).flatten() > 0), }, ) test_data['colA'] = test_data['colA'].map(str) test_data['colD'] = test_data['colD'].map(str) factor_cols = [('colA', 'True'), ('colD', 'True')] rpy_test_df = com.convert_to_r_dataframe(test_data) rpy_test_df = Rtools.convert_columns_to_factors(rpy_test_df, factor_cols) base_formula = Formula('colC ~ as.factor(colA) + colB') rand_formula = Formula('~1|colD') results = Rtools.R_linear_mixed_effects_model(rpy_test_df, base_formula, rand_formula) print results['tTable'] ok_(('tTable' in results), 'Did not have the tTable in the results') ok_(('as.factor(colA)False' in results['tTable'].index), 'Did not have the factor in the tTable') ok_(('colB' in results['tTable'].index), 'Did not have the variable in the tTable')
def test_quantile_normalize(): test_data = DataFrame( { 'colA': Series(randn(1, 5000).flatten()), 'colB': Series(100 * randn(1, 5000).flatten()), 'colC': Series(100 + randn(1, 5000).flatten()), }, ) test_data.index = Index(map(str, range(5000))) normed_data = Rtools.quantile_norm_with_R(test_data) yield ok_, (normed_data.index == test_data.index).all() yield ok_, (normed_data.columns == test_data.columns).all() means = normed_data.mean(axis=0) for a, b in product(means.values, repeat=2): yield ok_, abs(a - b) < 0.01