def testCreateTableFromData(self): """ Test suite for createTbaleFRomData method """ data_names = [ 'intList', 'floatList', 'charList', 'stringList', 'booleanList', 'timeList' ] data_list = [[1, 2, None], [1., 2., None], ['A', 'B', None], [u'one', u'two', None], [True, False, None], [datetime.utcnow(), datetime.utcnow(), datetime.utcnow()]] with self.subTest(msg="createTableFromData with lists"): tab = createTableFromData(data_list, columns=data_names) print("tableFromList = {}\n".format(TableTools.html(tab))) data_dict = {} for nm, da in zip(data_names, data_list): data_dict[nm] = da with self.subTest(msg="createTableFromData with dict"): tab = createTableFromData(data_dict, columns=data_names) print("tableFromDict = {}\n".format(TableTools.html(tab)))
def test_pyobj_field_access(self): t = TableTools.emptyTable(10) t2 = t.update( "SYM = `AAPL-` + (String)pyobj.name", "PRICE = i * 1000").where("PRICE > (int)pyobj.price + 100") html_output = TableTools.html(t2) self.assertIn("AAPL-GOOG", html_output) self.assertIn("2000", html_output)
def testListColumnVersion(self): """ Test for behavior when one of the data frame columns contains tuples or lists """ def1 = { ('a', 'b'): { ('A', 'B'): 1, ('A', 'C'): 2 }, ('a', 'a'): { ('A', 'C'): 3, ('A', 'B'): 4 }, ('a', 'c'): { ('A', 'B'): 5, ('A', 'C'): 6 }, ('b', 'a'): { ('A', 'C'): 7, ('A', 'B'): 8 }, ('b', 'b'): { ('A', 'D'): 9, ('A', 'B'): 10 } } dataframe1 = pandas.DataFrame(def1) table1 = dataFrameToTable(dataframe1) print("dataframe1 = \n{}".format(dataframe1)) print("table1 = {}\n".format(TableTools.html(table1))) def2 = { 'one': [(1, 2), (2, 3), (3, ), (4, 5, 6, 7)], 'two': [(4, 5), (6, 5, 3), (7, 6), (8, 7)], 'thing': [None, None, None, None] } dataframe2 = pandas.DataFrame(def2) table2 = dataFrameToTable(dataframe2, convertUnknownToString=True) print("dataframe2 = \n{}".format(dataframe2)) print("table2 = {}\n".format(TableTools.html(table2))) def3 = { 'one': [[1, 2], [2, 3], [3, 4], [4, 5, 6, 7]], 'two': [[4, 5], [6, 5], [7, 6], [8, 7]], 'thing': [None, None, None, None] } dataframe3 = pandas.DataFrame(def3) table3 = dataFrameToTable(dataframe3, convertUnknownToString=True) print("dataframe3 = \n{}".format(dataframe3)) print("table3 = {}\n".format(TableTools.html(table3)))
def testAggMethods(self): # create a silly table tab = TableTools.emptyTable(10) tab = tab.update("dumb=(int)(i/5)", "var=(int)i", "weights=(double)1.0/(i+1)") # try the various aggregate methods - just a coverage test aggs = [ Aggregation.AggGroup("aggGroup=var"), Aggregation.AggAvg("aggAvg=var"), Aggregation.AggCount("aggCount"), Aggregation.AggFirst("aggFirst=var"), Aggregation.AggLast("aggLast=var"), Aggregation.AggMax("aggMax=var"), Aggregation.AggMed("aggMed=var"), Aggregation.AggMin("aggMin=var"), Aggregation.AggPct(0.20, "aggPct=var"), Aggregation.AggStd("aggStd=var"), Aggregation.AggSum("aggSum=var"), Aggregation.AggAbsSum("aggAbsSum=var"), Aggregation.AggVar("aggVar=var"), Aggregation.AggWAvg("var", "weights") ] j_agg_list = _JArrayList() for agg in aggs: j_agg_list.add(agg) tab.aggBy(j_agg_list, "dumb") # TODO: AggFormula - this is terrible del tab
def setUpClass(self): """ Inherited method allowing initialization of test environment """ self.table = TableTools.emptyTable(200).update( "timestamp=new DateTime((long)(i/2)*1000000000)", "Sym=((i%2 == 0) ? `MSFT` : `AAPL`)", "price=(double)((i%2 == 0) ? 100.0 + (i/2) + 5*Math.random() : 250.0 + (i/2) + 10*Math.random())" )
def testTableToDataframeNoNulls(self): """ Test for converting a basic table with no null values to a dataframe """ tab_reg = TableTools.emptyTable(1).update( "boolCol=(boolean)false", "byteCol=(byte)0", "shortCol=(short)0", "intCol=(int)0", "longCol=(long)0", "floatCol=(float)0", "doubleCol=(double)0", "datetimeCol=new DateTime(0)", "stringCol=`test`") # there are no nulls here, so all three conversion options should work, and result in identical dataframes with self.subTest(msg="convert null when no null values"): df = tableToDataFrame(tab_reg, convertNulls='ERROR', categoricals=None) df_reg = tableToDataFrame(tab_reg, convertNulls='PASS', categoricals=None) df_reg_nc = tableToDataFrame(tab_reg, convertNulls='CONVERT', categoricals=None) # EQUALITY CHECK with self.subTest(msg='converted dfs are equal'): self.assertTrue(df.equals(df_reg)) # equals is transitive self.assertTrue(df_reg.equals(df_reg_nc)) # DATA TYPE TEST for col, dtyp in [('boolCol', numpy.bool_), ('byteCol', numpy.int8), ('shortCol', numpy.int16), ('intCol', numpy.int32), ('longCol', numpy.int64), ('floatCol', numpy.float32), ('doubleCol', numpy.float64), ('datetimeCol', numpy.dtype('datetime64[ns]')), ('stringCol', numpy.object)]: # NB: I'm confident that dtype is not checked for df.equals(), so it's not redundant to do both with self.subTest( msg='dtype nulls_convert=ERROR for {}'.format(col)): self.assertEqual(df[col].values.dtype, dtyp) with self.subTest( msg='dtype nulls_convert=PASS for {}'.format(col)): self.assertEqual(df_reg[col].values.dtype, dtyp) with self.subTest( msg='dtype nulls_convert=CONVERT for {}'.format(col)): self.assertEqual( df_reg_nc[col].values.dtype, dtyp) # there are no nulls -> no dumb type casts # VALUES TEST for col, val in [('boolCol', False), ('byteCol', 0), ('shortCol', 0), ('intCol', 0), ('longCol', 0), ('floatCol', 0), ('doubleCol', 0), ('datetimeCol', numpy.datetime64(0, 'ns')), ('stringCol', u'test')]: # NB: raw unicode string should be simultaneously python2/3 compliant with self.subTest(msg='entries for {}'.format(col)): self.assertEqual(df[col].values[0], val)
def setUpClass(cls): """ Inherited method allowing initialization of test environment """ cls.table = TableTools.emptyTable(200).update("timestamp=new DateTime((long)(i/2)*1000000000)", "Sym=((i%2 == 0) ? `MSFT` : `AAPL`)", "price=(double)((i%2 == 0) ? 100.0 + (i/2) + 5*Math.random() : 250.0 + (i/2) + 10*Math.random())") longs = numpy.arange(0, 86401, 60, dtype=numpy.int64) cls.arrays = { 'DateTime[]': longs.astype('datetime64[s]'), 'long[]': longs, 'int[]': longs.astype(numpy.int32), 'float[]': longs.astype(numpy.float32), 'double[]': longs.astype(numpy.float64), }
def setUpClass(cls): """ Inherited method allowing initialization of test environment """ # Tables cls.bool_table = TableTools.emptyTable(100).update( "X = true", "Y = false", "Z = (i % 2 == 0) ? true : false") cls.byte_table = TableTools.emptyTable(100).update( "X = (byte)i", "Y = (byte)(100 - X)", "Z = (byte)(-101 + X)") cls.short_table = TableTools.emptyTable(100).update( "X = (short)i", "Y = (short)(100 - X)", "Z = (short)(-101 + X)") cls.int_table = TableTools.emptyTable(100).update( "X = (int)i", "Y = 100 - X", "Z = -101 + X") cls.long_table = TableTools.emptyTable(100).update( "X = (long)i", "Y = 100 - X", "Z = -101 + X") cls.float_table = TableTools.emptyTable(100).update( "X = (float)i", "Y = (float)sqrt(X)", "Z = (float)sqrt(Y)") cls.double_table = TableTools.emptyTable(100).update( "X = (double)i", "Y = sqrt(X)", "Z = sqrt(Y)") # NumPy arrays cls.bool_array = \ np.array([[True, False, True], [True, False, False]] * 50, dtype = np.bool_) cls.byte_array = np.vstack( (np.arange(0, 100, dtype=np.byte), np.arange(100, 0, -1, dtype=np.byte), np.arange(-101, -1, dtype=np.byte))).T cls.short_array = np.vstack( (np.arange(0, 100, dtype=np.short), np.arange(100, 0, -1, dtype=np.short), np.arange(-101, -1, dtype=np.short))).T cls.int_array = np.vstack( (np.arange(0, 100, dtype=np.intc), np.arange(100, 0, -1, dtype=np.intc), np.arange(-101, -1, dtype=np.intc))).T cls.long_array = np.vstack( (np.arange(0, 100, dtype=np.int_), np.arange(100, 0, -1, dtype=np.int_), np.arange(-101, -1, dtype=np.int_))).T cls.float_array = np.vstack( (np.arange(0, 100, dtype=np.single), np.sqrt(np.arange(0, 100, dtype=np.single)), np.sqrt(np.sqrt(np.arange(0, 100, dtype=np.single))))).T cls.double_array = np.vstack( (np.arange(0, 100, dtype=np.double), np.sqrt(np.arange(0, 100, dtype=np.double)), np.sqrt(np.sqrt(np.arange(0, 100, dtype=np.double))))).T
def test_column(self): t = TableTools.emptyTable(10).view( "I=ii", "J=(ii * 2)").update("K = vectorized_func(I, J)") html_output = TableTools.html(t) self.assertIn("<td>9</td>", html_output)
def test_part_of_expr(self): with self.assertRaises(Exception): t = TableTools.emptyTable(10).view( "I=ii", "J=(ii * 2)").update("K = 2 * vectorized_func(I, J)")
def test_filter(self): t = TableTools.emptyTable(10).view( "I=ii", "J=(ii * 2)").where("vectorized_func(I, J)") html_output = TableTools.html(t) self.assertIn("<td>5</td><td>10</td>", html_output)
def test_wrong_return_type(self): with self.assertRaises(Exception): t = TableTools.emptyTable(10).view("I=ii", "J=(ii * 2)")\ .where("vectorized_func_wrong_return_type(I, J)")
def test_long_number_conversion(self): t = TableTools.emptyTable(1) result = TableTools.string(t.update("X = long_value"), 1) self.assertEqual(long_value, int(result.split()[2]))
def testTableToDataframeWithNulls(self): """ Test for converting a basic table with null values to a dataframe """ tab_nulls = TableTools.emptyTable(2).update( "boolCol=((i==0) ? true : null)", "byteCol=(byte)((i==0) ? 0 : NULL_BYTE)", "shortCol=(short)((i==0) ? 2 : NULL_SHORT)", "intCol=(int)((i==0) ? 0 : NULL_INT)", "longCol=(long)((i==0) ? 0 : NULL_LONG)", "floatCol=(float)((i==0) ? 2 : NULL_FLOAT)", "doubleCol=(double)((i==0) ? 2 : NULL_DOUBLE)", "datetimeCol=((i==0) ? new DateTime(0) : null)") with self.subTest( msg="Does not convert if convertNulls=ERROR and nulls present" ): self.assertRaises(ValueError, tableToDataFrame, tab_nulls, convertNulls='ERROR', categoricals=None) with self.subTest( msg= "Converts if convertNulls in [PASS, CONVERT] and nulls present" ): df_nulls = tableToDataFrame(tab_nulls, convertNulls='PASS', categoricals=None) df_nulls_nc = tableToDataFrame(tab_nulls, convertNulls='CONVERT', categoricals=None) # EQUALITY CHECK self.assertFalse(df_nulls.equals(df_nulls_nc)) # DATA TYPES TEST # verify that the dtypes are as expected when we DO NOT convert the nulls for col, dtyp in [('boolCol', numpy.bool_), ('byteCol', numpy.int8), ('shortCol', numpy.int16), ('intCol', numpy.int32), ('longCol', numpy.int64), ('floatCol', numpy.float32), ('doubleCol', numpy.float64), ('datetimeCol', numpy.dtype('datetime64[ns]'))]: with self.subTest( msg='data type, nulls_convert=False, for {}'.format(col)): self.assertEqual(df_nulls[col].values.dtype, dtyp) # as before # verify that the dtypes are as expected when we DO convert the nulls for col, dtyp in [ ('boolCol', numpy.object), ('byteCol', numpy.float32), ('shortCol', numpy.float32), ('intCol', numpy.float64), ('longCol', numpy.float64), ('floatCol', numpy.float32), ('doubleCol', numpy.float64), ('datetimeCol', numpy.dtype('datetime64[ns]')) ]: with self.subTest( msg='data type, nulls_convert=True, for {}'.format(col)): self.assertEqual(df_nulls_nc[col].values.dtype, dtyp) # VALUES TEST # verify that the null entries are as expected when we DO NOT convert the nulls for col, val in [ ('boolCol', False), ('byteCol', NULL_BYTE), ('shortCol', NULL_SHORT), ('intCol', NULL_INT), ('longCol', NULL_LONG), ]: with self.subTest( msg='null entry, nulls_convert=False, for {}'.format(col)): self.assertEqual(df_nulls[col].values[1], val) # floating point types & time converted to NaN/T regardless of null conversion with self.subTest(msg='null entry, nulls_convert=False, for floatCol'): self.assertTrue(numpy.isnan(df_nulls['floatCol'].values[1])) with self.subTest( msg='null entry, nulls_convert=False, for doubleCol'): self.assertTrue(numpy.isnan(df_nulls['doubleCol'].values[1])) with self.subTest( msg='null entry, nulls_convert=False, for datetimeCol'): self.assertTrue(numpy.isnat(df_nulls['datetimeCol'].values[1])) # verify that the null entries are as expected when we DO convert the nulls with self.subTest(msg='entries nulls_convert=True for bool'): self.assertIsNone(df_nulls_nc['boolCol'][1]) for col in [ 'byteCol', 'shortCol', 'intCol', 'longCol', 'floatCol', 'doubleCol' ]: with self.subTest(msg='regular entry, nulls_convert=True, for {}'. format(col)): self.assertFalse(numpy.isnan(df_nulls_nc[col].values[0])) with self.subTest( msg='null entry, nulls_convert=True, for {}'.format(col)): self.assertTrue(numpy.isnan(df_nulls_nc[col].values[1])) with self.subTest( msg='regular entry, nulls_convert=True, for datetimeCol'): self.assertEqual(df_nulls_nc['datetimeCol'].values[0], numpy.datetime64(0, 'ns')) with self.subTest( msg='null entry, nulls_convert=False, for {}'.format(col)): self.assertTrue(numpy.isnat(df_nulls['datetimeCol'].values[1]))
def testArrayColumnConversion(self): """ Test for behavior when one of the columns is of array type (in each direction) """ firstTable = TableTools.emptyTable(10).update( "MyString=new String(`a`+i)", "MyChar=new Character((char) ((i%26)+97))", "MyBoolean=new Boolean(i%2==0)", "MyByte=new java.lang.Byte(Integer.toString(i%127))", "MyShort=new Short(Integer.toString(i%32767))", "MyInt=new Integer(i)", "MyLong=new Long(i)", "MyFloat=new Float(i+i/10)", "MyDouble=new Double(i+i/10)") arrayTable = firstTable.update("A=i%3").groupBy("A") dataFrame = tableToDataFrame(arrayTable, convertNulls='PASS', categoricals=None) for colName, arrayType in [ ('MyString', 'io.deephaven.vector.ObjectVector'), ('MyChar', 'io.deephaven.vector.CharVector'), ('MyBoolean', 'io.deephaven.vector.ObjectVector' ), # NB: BooleanVector is deprecated ('MyByte', 'io.deephaven.vector.ByteVector'), ('MyShort', 'io.deephaven.vector.ShortVector'), ('MyInt', 'io.deephaven.vector.IntVector'), ('MyLong', 'io.deephaven.vector.LongVector'), ('MyFloat', 'io.deephaven.vector.FloatVector'), ('MyDouble', 'io.deephaven.vector.DoubleVector'), ]: with self.subTest( msg="type for original column {}".format(colName)): self.assertEqual( arrayTable.getColumn(colName).getType().getName(), arrayType) self.assertEqual(dataFrame[colName].values.dtype, numpy.object) for colName, dtype in [ ('MyBoolean', numpy.bool_), ('MyByte', numpy.int8), ('MyShort', numpy.int16), ('MyInt', numpy.int32), ('MyLong', numpy.int64), ('MyFloat', numpy.float32), ('MyDouble', numpy.float64), ]: with self.subTest( msg="type of converted array for {}".format(colName)): self.assertTrue( isinstance(dataFrame[colName].values[0], numpy.ndarray)) self.assertEqual(dataFrame[colName].values[0].dtype, dtype) with self.subTest(msg="type of converted array for MyString"): self.assertTrue( isinstance(dataFrame['MyString'].values[0], numpy.ndarray)) self.assertTrue( dataFrame['MyString'].values[0].dtype.name.startswith( 'unicode') or dataFrame['MyString'].values[0].dtype.name.startswith('str')) # NB: numpy really doesn't have a char type, so it gets treated like an uninterpretted type with self.subTest(msg="type of converted array for MyChar"): self.assertTrue( isinstance(dataFrame['MyChar'].values[0], numpy.ndarray)) self.assertTrue( dataFrame['MyChar'].values[0].dtype.name.startswith('unicode') or dataFrame['MyChar'].values[0].dtype.name.startswith('str')) # convert back backTable = dataFrameToTable(dataFrame, convertUnknownToString=True) for colName, arrayType in [ ('MyString', 'io.deephaven.vector.ObjectVectorDirect'), ('MyChar', 'io.deephaven.vector.CharVectorDirect'), ('MyBoolean', 'io.deephaven.vector.ObjectVectorDirect'), ('MyByte', 'io.deephaven.vector.ByteVectorDirect'), ('MyShort', 'io.deephaven.vector.ShortVectorDirect'), ('MyInt', 'io.deephaven.vector.IntVectorDirect'), ('MyLong', 'io.deephaven.vector.LongVectorDirect'), ('MyFloat', 'io.deephaven.vector.FloatVectorDirect'), ('MyDouble', 'io.deephaven.vector.DoubleVectorDirect'), ]: with self.subTest( msg="type for reverted column for {}".format(colName)): self.assertEqual( backTable.getColumn(colName).getType().getName(), arrayType) with self.subTest(msg="element type for reverted column MyBoolean"): self.assertEqual( backTable.getColumn('MyBoolean').get( 0).getComponentType().getName(), 'java.lang.Boolean') with self.subTest(msg="element type for reverted column MyString"): self.assertEqual( backTable.getColumn('MyString').get( 0).getComponentType().getName(), 'java.lang.String')