def test_rolling1(self): # size 3 without unroll def test_impl(n): df = pd.DataFrame({'A': np.arange(n), 'B': np.random.ranf(n)}) Ac = df.A.rolling(3).sum() return Ac.sum() hpat_func = sdc.jit(test_impl) n = 121 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) # size 7 with unroll def test_impl_2(n): df = pd.DataFrame({ 'A': np.arange(n) + 1.0, 'B': np.random.ranf(n) }) Ac = df.A.rolling(7).sum() return Ac.sum() hpat_func = sdc.jit(test_impl) n = 121 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_reduce_filter1(self): import sys dtypes = ['float32', 'float64', 'int32', 'int64'] funcs = ['sum', 'prod', 'min', 'max', 'argmin', 'argmax'] for (dtype, func) in itertools.product(dtypes, funcs): # loc allreduce doesn't support int64 on windows if (sys.platform.startswith('win') and dtype == 'int64' and func in ['argmin', 'argmax']): continue func_text = """def f(A): A = A[A>5] return A.{}() """.format(func) loc_vars = {} exec(func_text, {'np': np}, loc_vars) test_impl = loc_vars['f'] hpat_func = self.jit(locals={'A:input': 'distributed'})(test_impl) n = 21 start, end = get_start_end(n) np.random.seed(0) A = np.random.randint(0, 10, n).astype(dtype) np.testing.assert_almost_equal(hpat_func(A[start:end]), test_impl(A), decimal=3, err_msg="{} on {}".format( func, dtype)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_shape(self): def test_impl(N): return np.ones(N).shape[0] hpat_func = self.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_pq_float_no_nan(self): def test_impl(): df = pq.read_table('example.parquet').to_pandas() return df.four.sum() hpat_func = self.jit(test_impl) np.testing.assert_almost_equal(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_astype(self): def test_impl(N): return np.ones(N).astype(np.int32).sum() hpat_func = sdc.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_pq_read_global_str1(self): def test_impl(): df = pd.read_parquet(kde_file) X = df['points'] return X.sum() hpat_func = self.jit(test_impl) np.testing.assert_almost_equal(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_np_io2(self): # parallel version def test_impl(): A = np.fromfile("np_file1.dat", np.float64) return A.sum() hpat_func = self.jit(test_impl) np.testing.assert_almost_equal(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_pq_str_with_nan_par_multigroup(self): def test_impl(): df = pq.read_table('example2.parquet').to_pandas() A = df.five.values == 'foo' return A.sum() hpat_func = self.jit(test_impl) np.testing.assert_almost_equal(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_pq_str(self): def test_impl(): df = pq.read_table('example.parquet').to_pandas() A = df.two.values == 'foo' return A.sum() hpat_func = sdc.jit(test_impl) np.testing.assert_almost_equal(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_pd_read_parquet(self): def test_impl(): df = pd.read_parquet('kde.parquet') X = df['points'] return X.sum() hpat_func = sdc.jit(test_impl) np.testing.assert_almost_equal(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_agg_parallel_str(self): def test_impl(): df = pq.read_table("groupby3.pq").to_pandas() A = df.groupby('A')['B'].agg(lambda x: x.max() - x.min()) return A.sum() hpat_func = self.jit(test_impl) self.assertEqual(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_df_values_parallel1(self): def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n)}) return df.values.sum() hpat_func = sdc.jit(test_impl) n = 11 np.testing.assert_array_equal(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_quantile_parallel_int(self): def test_impl(n): df = pd.DataFrame({'A': np.arange(0, n, 1, np.int32)}) return df.A.quantile(.25) hpat_func = sdc.jit(test_impl) n = 1001 np.testing.assert_almost_equal(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_whole_slice(self): def test_impl(N): X = np.ones((N, 4)) X[:, 3] = (X[:, 3]) / (np.max(X[:, 3]) - np.min(X[:, 3])) return X.sum() hpat_func = self.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_setitem1(self): def test_impl(N): A = np.arange(10) + 1.0 A[0] = 30 return A.sum() hpat_func = self.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_pq_read(self): def test_impl(): t = pq.read_table('kde.parquet') df = t.to_pandas() X = df['points'] return X.sum() hpat_func = self.jit(test_impl) np.testing.assert_almost_equal(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_strided_getitem(self): def test_impl(N): A = np.ones(N) B = A[::7] return B.sum() hpat_func = self.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_shift2(self): def test_impl(n): df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.random.ranf(n)}) Ac = df.A.pct_change(1) return Ac.sum() hpat_func = self.jit(test_impl) n = 11 np.testing.assert_almost_equal(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_agg_parallel_std(self): def test_impl(n): df = pd.DataFrame({'A': np.ones(n, np.int64), 'B': np.arange(n)}) A = df.groupby('A')['B'].std() return A.sum() hpat_func = self.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_filter3(self): def test_impl(n): df = pd.DataFrame({'A': np.arange(n) + n, 'B': np.arange(n)**2}) df1 = df.iloc[(df.A > .5).values] return np.sum(df1.B) hpat_func = sdc.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_agg_parallel_as_index(self): def test_impl(n): df = pd.DataFrame({'A': np.ones(n, np.int64), 'B': np.arange(n)}) df2 = df.groupby('A', as_index=False).max() return df2.A.sum() hpat_func = self.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_concat_series_str(self): def test_impl(): df1 = pq.read_table('example.parquet').to_pandas() df2 = pq.read_table('example.parquet').to_pandas() A3 = pd.concat([df1.two, df2.two]) return (A3 == 'foo').sum() hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_rolling3(self): def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.random.ranf(n)}) Ac = df.A.rolling(3, center=True).apply(lambda a: a[0] + 2 * a[1] + a[2]) return Ac.sum() hpat_func = self.jit(test_impl) n = 121 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_1D_Var_len(self): def test_impl(n): df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n) + 1.0}) df1 = df[df.A > 5] return len(df1.B) hpat_func = sdc.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_rolling2(self): def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.random.ranf(n)}) df['moving average'] = df.A.rolling(window=5, center=True).mean() return df['moving average'].sum() hpat_func = sdc.jit(test_impl) n = 121 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_describe(self): def test_impl(n): df = pd.DataFrame({'A': np.arange(0, n, 1, np.float64)}) return df.A.describe() hpat_func = sdc.jit(test_impl) n = 1001 hpat_func(n) # XXX: test actual output self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_getitem_multidim(self): def test_impl(N): A = np.ones((N, 3)) B = np.ones(N) > .5 C = A[B, 2] return C.sum() hpat_func = self.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_transpose(self): def test_impl(n): A = np.ones((30, 40, 50)) B = A.transpose((0, 2, 1)) C = A.transpose(0, 2, 1) return B.sum() + C.sum() hpat_func = self.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_quantile_parallel_float_nan(self): def test_impl(n): df = pd.DataFrame({'A': np.arange(0, n, 1, np.float32)}) df.A[0:100] = np.nan df.A[200:331] = np.nan return df.A.quantile(.25) hpat_func = sdc.jit(test_impl) n = 1001 np.testing.assert_almost_equal(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_column_getitem1(self): def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.random.ranf(n)}) Ac = df['A'].values return Ac.sum() hpat_func = sdc.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) self.assertEqual(count_parfor_OneDs(), 1)