def test_integer_array(self): np_array = np.array([float('nan'), NULL_DOUBLE, 1.123, np.inf], dtype=np.float64) nulls = {dtypes.int64: NULL_LONG, dtypes.int32: NULL_INT, dtypes.short: NULL_SHORT, dtypes.byte: NULL_BYTE} for dt, nv in nulls.items(): map_fn = functools.partial(remap_double, null_value=nv) with self.subTest(f"numpy double array to {dt}"): expected = [nv, nv, 1, nv] j_array = dtypes.array(dt, np_array, remap=map_fn) py_array = [x for x in j_array] self.assertEqual(expected, py_array) with self.subTest("int array from Python list"): expected = [1, 2, 3] j_array = dtypes.array(dtypes.int32, [1.1, 2.2, 3.3]) self.assertIn("[I", str(type(j_array))) py_array = [x for x in j_array] self.assertEqual(expected, py_array) with self.subTest("byte array from Python list, down cast"): expected = [1000, 2000, 3000] j_array = dtypes.array(dtypes.byte, expected) self.assertIn("[B", str(type(j_array))) py_array = [x for x in j_array] self.assertNotEqual(expected, py_array)
def test_char_array(self): def remap_char(v): if v is None: return NULL_CHAR if isinstance(v, int): return v if isinstance(v, str): if len(v) < 1: return NULL_CHAR return ord(v[0]) try: return int(v) except: return NULL_CHAR test_str = "abcdefg0123456" j_array = dtypes.array(dtypes.char, test_str) self.assertIn("[C", str(type(j_array))) py_array = [chr(x) for x in j_array] self.assertEqual(test_str, "".join(py_array)) test_list = [None, "abc", {}, 69] expected = [NULL_CHAR, ord("a"), NULL_CHAR, ord("E")] j_array = dtypes.array(dtypes.char, test_list, remap=remap_char) py_array = [x for x in j_array] self.assertIn("[C", str(type(j_array))) self.assertEqual(expected, py_array)
def test_array(self): j_array = dtypes.array(dtypes.int_, range(5)) np_array = np.frombuffer(j_array, np.int64) expected = np.array([0, 1, 2, 3, 4], dtype=np.int64) self.assertTrue(np.array_equal(np_array, expected)) j_array = dtypes.array(dtypes.int64, [0, 1, 2, 3, 4]) np_array = np.frombuffer(j_array, dtype=np.int64) self.assertTrue(np.array_equal(np_array, expected))
def __post_init__(self): try: if self.input_data is None: self.j_column = _JColumn.empty(self.j_column_header) else: if self.data_type.is_primitive: self.j_column = _JColumn.ofUnsafe( self.name, dtypes.array(self.data_type, self.input_data)) else: self.j_column = _JColumn.of( self.j_column_header, dtypes.array(self.data_type, self.input_data)) except Exception as e: raise DHError(e, "failed to create an InputColumn.") from e
def j_partitions(partitions): if partitions is None: partitions = ALL_PARTITIONS else: j_array = dtypes.array(dtypes.int32, partitions) partitions = _JKafkaTools.partitionFilterFromArray(j_array) return partitions
def test_dtw_with_array_types(self): with self.subTest("Array type columns"): col_defs = { "ByteArray": dtypes.byte_array, "ShortArray": dtypes.short_array, "Int32Array": dtypes.int32_array, "LongArray": dtypes.long_array, "Float32Array": dtypes.float32_array, "DoubleArray": dtypes.double_array, "StringArray": dtypes.string_array, } with DynamicTableWriter(col_defs) as table_writer: b_array = dtypes.array(dtypes.byte, [1, 1, 1]) s_array = dtypes.array(dtypes.short, [128, 228, 328]) i_array = dtypes.array(dtypes.int32, [32768, 42768, 52768]) l_array = dtypes.array(dtypes.long, [2**32, 2**33, 2**36]) f_array = dtypes.array(dtypes.float32, [1.0, 1.1, 1.2]) d_array = dtypes.array(dtypes.double, [1.0 / 2**32, 1.1 / 2**33, 1.2 / 2**36]) str_array = dtypes.array(dtypes.string, ["some", "not so random", "text"]) table_writer.write_row(b_array, s_array, i_array, l_array, f_array, d_array, str_array) t = table_writer.table self.wait_ticking_table_update(t, row_count=1, timeout=5) self.assertNotIn("null", t.to_string())
def _make_input_column(col: str, np_array: np.ndarray) -> InputColumn: """ Creates a InputColumn with the given column name and the numpy array. """ dtype = dtypes.from_np_dtype(np_array.dtype) if dtype == dtypes.bool_: bytes_ = np_array.astype(dtype=np.int8) j_bytes = dtypes.array(dtypes.byte, bytes_) np_array = _JPrimitiveArrayConversionUtility.translateArrayByteToBoolean(j_bytes) if dtype == dtypes.DateTime: longs = jpy.array('long', np_array.astype('datetime64[ns]').astype('int64')) np_array = _JPrimitiveArrayConversionUtility.translateArrayLongToDateTime(longs) return InputColumn(name=_to_column_name(col), data_type=dtype, input_data=np_array)
def test_floating_array(self): nulls = {dtypes.float_: NULL_FLOAT, dtypes.double: NULL_DOUBLE} np_array = np.array([float('nan'), 1.7976931348623157e+300, NULL_DOUBLE, 1.1, float('inf')], dtype=np.float64) for dt, nv in nulls.items(): map_fn = functools.partial(remap_double, null_value=nv) with self.subTest(f"numpy double array to {dt} with mapping"): expected = [nv, 1.7976931348623157e+300, nv, 1.1, nv] j_array = dtypes.array(dt, np_array, remap=map_fn) py_array = [x for x in j_array] for i in range(4): # downcast from double to float results in inf when the value is outside of float range self.assertTrue(math.isclose(expected[i], py_array[i], rel_tol=1e-7) or py_array[i] == float('inf')) with self.subTest("double array from numpy array"): np_array = np.array([float('nan'), NULL_DOUBLE, 1.1, float('inf')], dtype=np.float64) pd_series = pd.Series(np_array) j_array = dtypes.array(dtypes.double, pd_series) py_array = [x for x in j_array] expected = [float('nan'), NULL_DOUBLE, 1.1, float('inf')] self.assertTrue(math.isnan(py_array[0])) self.assertEqual(expected[1:], py_array[1:]) with self.subTest("double array from numpy long array"): expected = [NULL_LONG, 1, 2, 3] np_array = np.array(expected, dtype=np.int64) j_array = dtypes.array(dtypes.float64, np_array) self.assertIn("[D", str(type(j_array))) py_array = [x for x in j_array] for i in range(4): self.assertAlmostEqual(expected[i], py_array[i]) with self.subTest("double array from Python list of integer"): expected = [NULL_LONG, 1, 2, 3] j_array = dtypes.array(dtypes.float64, expected) py_array = [x for x in j_array] for i in range(3): self.assertAlmostEqual(expected[i], py_array[i])
def _convert_j(name: str, obj: Any, types: List) -> Any: """Convert the input object into a Java object that can be used for plotting. Args: name (str): name of the variable being converted to Java obj (Any): object being converted to Java types (List): acceptable types for the object Raises: DHError """ if obj is None: return None _assert_type(name, obj, types) if isinstance(obj, numbers.Number): return obj elif isinstance(obj, str): return obj elif isinstance(obj, bool): return obj elif isinstance(obj, JObjectWrapper): return obj.j_object elif isinstance(obj, Enum): if isinstance(obj.value, JObjectWrapper): return obj.value.j_object else: return obj.value elif isinstance(obj, Sequence): # to avoid JPY's 'too many matching overloads' error np_array = numpy.array(obj) dtype = dtypes.from_np_dtype(np_array.dtype) return dtypes.array(dtype, np_array) elif isinstance(obj, Callable): return j_function(obj, dtypes.PyObject) else: raise DHError( message=f"Unsupported input type: name={name} type={type(obj)}")
def test_datetime(self): dt1 = DateTime(round(time.time())) dt2 = now() values = [dt1, dt2, None] j_array = dtypes.array(DateTime, values) self.assertTrue(all(x == y for x, y in zip(j_array, values)))