Exemplo n.º 1
0
    def test_liveness(self):
        not_managed = create_table()
        with liveness_scope() as l_scope:
            to_discard = create_table()
            df = to_pandas(to_discard)
            must_keep = create_table()
            df = to_pandas(must_keep)
            l_scope.preserve(must_keep)

        self.assertTrue(not_managed.j_table.tryRetainReference())
        self.assertTrue(must_keep.j_table.tryRetainReference())
        self.assertFalse(to_discard.j_table.tryRetainReference())
Exemplo n.º 2
0
 def test_to_pandas(self):
     df = to_pandas(self.test_table)
     self.assertEqual(len(df.columns), len(self.test_table.columns))
     self.assertEqual(df.size, 2 * len(self.test_table.columns))
     df_series = [df[col] for col in list(df.columns)]
     for i, col in enumerate(self.test_table.columns):
         with self.subTest(col):
             self.assertEqual(col.data_type.np_type, df_series[i].dtype)
             self.assertEqual(col.name, df_series[i].name)
Exemplo n.º 3
0
 def test_to_table_boolean_with_none(self):
     input_cols = [bool_col(name="Boolean", data=[True, None])]
     table_with_null_bool = new_table(cols=input_cols)
     prepared_table = table_with_null_bool.update(formulas=[
         "Boolean = isNull(Boolean) ? NULL_BYTE : (Boolean == true ? 1: 0)"
     ])
     df = to_pandas(prepared_table)
     table_from_df = to_table(df)
     self.assert_table_equals(table_from_df, prepared_table)
Exemplo n.º 4
0
    def test_to_pandas_remaps(self):
        prepared_table = self.test_table.update(
            formulas=["Long = isNull(Long_) ? Double.NaN : Long_"])

        df = to_pandas(prepared_table, cols=["Boolean", "Long"])
        self.assertEqual(df['Long'].dtype, np.float64)
        self.assertEqual(df['Boolean'].values.dtype, np.bool_)

        df1 = pd.DataFrame([[1, float('Nan')], [True, False]])
        df1.equals(df)
Exemplo n.º 5
0
    def test_liveness_nested(self):
        with liveness_scope() as l_scope:
            to_discard = create_table()
            df = to_pandas(to_discard)
            must_keep = create_table()
            df = to_pandas(must_keep)
            l_scope.preserve(must_keep)

            with liveness_scope() as nested_l_scope:
                nested_to_discard = create_table()
                df = to_pandas(nested_to_discard)
                nested_must_keep = create_table()
                df = to_pandas(nested_must_keep)
                nested_l_scope.preserve(nested_must_keep)
            self.assertTrue(nested_must_keep.j_table.tryRetainReference())
            # drop the extra reference obtained by the tryRetainReference() call in the above assert
            nested_must_keep.j_table.dropReference()
            self.assertFalse(nested_to_discard.j_table.tryRetainReference())

        self.assertTrue(must_keep.j_table.tryRetainReference())
        self.assertFalse(to_discard.j_table.tryRetainReference())
        self.assertFalse(nested_must_keep.j_table.tryRetainReference())
        self.assertFalse(nested_to_discard.j_table.tryRetainReference())
Exemplo n.º 6
0
    def test_vector_column(self):
        strings = ["Str1", "Str1", "Str2", "Str2", "Str2"]
        doubles = [1.0, 2.0, 4.0, 8.0, 16.0]
        test_table = new_table(
            [string_col("String", strings),
             double_col("Doubles", doubles)])

        test_table = test_table.group_by(["String"])
        df = to_pandas(test_table, cols=["String", "Doubles"])
        self.assertEqual(df['String'].dtype, np.object_)
        self.assertEqual(df['Doubles'].dtype, np.object_)

        double_series = df['Doubles']
        self.assertEqual([1.0, 2.0], list(double_series[0].toArray()))
        self.assertEqual([4.0, 8.0, 16.0], list(double_series[1].toArray()))
Exemplo n.º 7
0
 def test_to_table(self):
     input_cols = [
         bool_col(name="Boolean", data=[True, False]),
         byte_col(name="Byte", data=(1, -1)),
         char_col(name="Char", data='-1'),
         short_col(name="Short", data=[1, -1]),
         int_col(name="Int", data=[1, -1]),
         long_col(name="Long", data=[1, NULL_LONG]),
         long_col(name="NPLong", data=np.array([1, -1], dtype=np.int8)),
         float_col(name="Float", data=[1.01, -1.01]),
         double_col(name="Double", data=[1.01, -1.01]),
     ]
     test_table = new_table(cols=input_cols)
     df = to_pandas(test_table)
     table_from_df = to_table(df)
     self.assert_table_equals(table_from_df, test_table)
Exemplo n.º 8
0
    def test_to_table_datetime_with_none(self):
        datetime_str = "2021-12-10T23:59:59 NY"
        dt = to_datetime(datetime_str)

        datetime_str = "2021-12-10T23:59:59 HI"
        dt1 = to_datetime(datetime_str)

        input_cols = [
            datetime_col(name="Datetime",
                         data=[dtypes.DateTime(1), None, dt, dt1])
        ]
        table_with_null_dt = new_table(cols=input_cols)

        df = to_pandas(table_with_null_dt)
        table_from_df = to_table(df)
        self.assert_table_equals(table_from_df, table_with_null_dt)
Exemplo n.º 9
0
    def base_test(self, source, model, np_dtype):
        rows = source.j_table.getRowSet()
        cols = [source.j_table.getColumnSource(col) for col in ["X", "Y", "Z"]]

        gatherer_rowmajor = lambda rowset, colset: gather.table_to_numpy_2d(
            rowset, colset, gather.MemoryLayout.ROW_MAJOR, np_dtype)
        gatherer_colmajor = lambda rowset, colset: gather.table_to_numpy_2d(
            rowset, colset, gather.MemoryLayout.COLUMN_MAJOR, np_dtype)

        array_from_table = to_pandas(source).values

        gathered_rowmajor = gatherer_rowmajor(rows, cols)
        gathered_colmajor = gatherer_colmajor(rows, cols)

        with self.subTest(msg="Array shape"):
            self.assertTrue(gathered_rowmajor.shape == array_from_table.shape)
            print("Row major gathered shape: {}".format(
                gathered_rowmajor.shape))
            self.assertTrue(gathered_colmajor.shape == array_from_table.shape)
            print("Column major gathered shape: {}".format(
                gathered_colmajor.shape))
        with self.subTest(msg="Values in array"):
            self.assertTrue(np.allclose(gathered_rowmajor, array_from_table))
            print("All row-major array values are equal")
            self.assertTrue(np.allclose(gathered_colmajor, array_from_table))
            print("All column-major array values are equal")
        with self.subTest(msg="Array data type"):
            self.assertTrue(gathered_rowmajor.dtype == np_dtype)
            self.assertTrue(gathered_rowmajor.dtype == array_from_table.dtype)
            self.assertTrue(gathered_colmajor.dtype == np_dtype)
            self.assertTrue(gathered_colmajor.dtype == array_from_table.dtype)
            self.assertTrue(gathered_rowmajor.dtype == gathered_colmajor.dtype)
            print("Array dtype: {}".format(np_dtype))
        with self.subTest(msg="Contiguity"):
            self.assertTrue(gathered_rowmajor.flags["C_CONTIGUOUS"]
                            or gathered_rowmajor.flags["F_CONTIGUOUS"])
            self.assertTrue(gathered_colmajor.flags["C_CONTIGUOUS"]
                            or gathered_colmajor.flags["F_CONTIGUOUS"])
            print("Array contiguity checked")
Exemplo n.º 10
0
 def test_round_trip_with_nulls(self):
     # Note that no two-way conversion for those types
     # j_array_list = dtypes.ArrayList([1, -1])
     # bool_col(name="Boolean", data=[True, None])]
     # string_col(name="String", data=["foo", None]),
     # jobj_col(name="JObj", data=[j_array_list, None]),
     input_cols = [
         byte_col(name="Byte", data=(1, NULL_BYTE)),
         char_col(name="Char", data='-1'),
         short_col(name="Short", data=[1, NULL_SHORT]),
         int_col(name="Int_", data=[1, NULL_INT]),
         long_col(name="Long_", data=[1, NULL_LONG]),
         float_col(name="Float_", data=[1.01, np.nan]),
         double_col(name="Double_", data=[1.01, np.nan]),
         datetime_col(name="Datetime", data=[dtypes.DateTime(1), None]),
         pyobj_col(name="PyObj", data=[CustomClass(1, "1"), None]),
     ]
     test_table = new_table(cols=input_cols)
     df = to_pandas(test_table)
     self.assertEqual(len(df.columns), len(test_table.columns))
     self.assertEqual(df.size, 2 * len(test_table.columns))
     test_table2 = to_table(df)
     self.assert_table_equals(test_table2, test_table)
Exemplo n.º 11
0
    def test_invalid_col_name(self):
        with self.assertRaises(DHError) as cm:
            to_pandas(self.test_table, cols=["boolean", "Long"])

        self.assertIn("boolean", str(cm.exception))
Exemplo n.º 12
0
 def test_to_table_category(self):
     df = pd.DataFrame({"A": ["a", "b", "a", "d"]})
     df["B"] = df["A"].astype("category")
     table = to_table(df)
     df2 = to_pandas(table)
     self.assertTrue(np.array_equal(df2["A"].values, df2["B"].values))