def testCreateTableFromData(self):
        """
        Test suite for createTbaleFRomData method
        """

        data_names = [
            'intList', 'floatList', 'charList', 'stringList', 'booleanList',
            'timeList'
        ]
        data_list = [[1, 2, None], [1., 2., None], ['A', 'B', None],
                     [u'one', u'two', None], [True, False, None],
                     [datetime.utcnow(),
                      datetime.utcnow(),
                      datetime.utcnow()]]

        with self.subTest(msg="createTableFromData with lists"):
            tab = createTableFromData(data_list, columns=data_names)
            print("tableFromList = {}\n".format(TableTools.html(tab)))

        data_dict = {}
        for nm, da in zip(data_names, data_list):
            data_dict[nm] = da
        with self.subTest(msg="createTableFromData with dict"):
            tab = createTableFromData(data_dict, columns=data_names)
            print("tableFromDict = {}\n".format(TableTools.html(tab)))
示例#2
0
 def test_pyobj_field_access(self):
     t = TableTools.emptyTable(10)
     t2 = t.update(
         "SYM = `AAPL-` + (String)pyobj.name",
         "PRICE = i * 1000").where("PRICE > (int)pyobj.price + 100")
     html_output = TableTools.html(t2)
     self.assertIn("AAPL-GOOG", html_output)
     self.assertIn("2000", html_output)
    def testListColumnVersion(self):
        """
        Test for behavior when one of the data frame columns contains tuples or lists
        """

        def1 = {
            ('a', 'b'): {
                ('A', 'B'): 1,
                ('A', 'C'): 2
            },
            ('a', 'a'): {
                ('A', 'C'): 3,
                ('A', 'B'): 4
            },
            ('a', 'c'): {
                ('A', 'B'): 5,
                ('A', 'C'): 6
            },
            ('b', 'a'): {
                ('A', 'C'): 7,
                ('A', 'B'): 8
            },
            ('b', 'b'): {
                ('A', 'D'): 9,
                ('A', 'B'): 10
            }
        }
        dataframe1 = pandas.DataFrame(def1)
        table1 = dataFrameToTable(dataframe1)
        print("dataframe1 = \n{}".format(dataframe1))
        print("table1 = {}\n".format(TableTools.html(table1)))

        def2 = {
            'one': [(1, 2), (2, 3), (3, ), (4, 5, 6, 7)],
            'two': [(4, 5), (6, 5, 3), (7, 6), (8, 7)],
            'thing': [None, None, None, None]
        }
        dataframe2 = pandas.DataFrame(def2)

        table2 = dataFrameToTable(dataframe2, convertUnknownToString=True)
        print("dataframe2 = \n{}".format(dataframe2))
        print("table2 = {}\n".format(TableTools.html(table2)))

        def3 = {
            'one': [[1, 2], [2, 3], [3, 4], [4, 5, 6, 7]],
            'two': [[4, 5], [6, 5], [7, 6], [8, 7]],
            'thing': [None, None, None, None]
        }
        dataframe3 = pandas.DataFrame(def3)

        table3 = dataFrameToTable(dataframe3, convertUnknownToString=True)
        print("dataframe3 = \n{}".format(dataframe3))
        print("table3 = {}\n".format(TableTools.html(table3)))
示例#4
0
    def testAggMethods(self):
        # create a silly table
        tab = TableTools.emptyTable(10)
        tab = tab.update("dumb=(int)(i/5)", "var=(int)i",
                         "weights=(double)1.0/(i+1)")

        # try the various aggregate methods - just a coverage test
        aggs = [
            Aggregation.AggGroup("aggGroup=var"),
            Aggregation.AggAvg("aggAvg=var"),
            Aggregation.AggCount("aggCount"),
            Aggregation.AggFirst("aggFirst=var"),
            Aggregation.AggLast("aggLast=var"),
            Aggregation.AggMax("aggMax=var"),
            Aggregation.AggMed("aggMed=var"),
            Aggregation.AggMin("aggMin=var"),
            Aggregation.AggPct(0.20, "aggPct=var"),
            Aggregation.AggStd("aggStd=var"),
            Aggregation.AggSum("aggSum=var"),
            Aggregation.AggAbsSum("aggAbsSum=var"),
            Aggregation.AggVar("aggVar=var"),
            Aggregation.AggWAvg("var", "weights")
        ]
        j_agg_list = _JArrayList()
        for agg in aggs:
            j_agg_list.add(agg)

        tab.aggBy(j_agg_list, "dumb")
        # TODO: AggFormula - this is terrible
        del tab
示例#5
0
 def setUpClass(self):
     """
     Inherited method allowing initialization of test environment
     """
     self.table = TableTools.emptyTable(200).update(
         "timestamp=new DateTime((long)(i/2)*1000000000)",
         "Sym=((i%2 == 0) ? `MSFT` : `AAPL`)",
         "price=(double)((i%2 == 0) ? 100.0 + (i/2) + 5*Math.random() : 250.0 + (i/2) + 10*Math.random())"
     )
    def testTableToDataframeNoNulls(self):
        """
        Test for converting a basic table with no null values to a dataframe
        """

        tab_reg = TableTools.emptyTable(1).update(
            "boolCol=(boolean)false", "byteCol=(byte)0", "shortCol=(short)0",
            "intCol=(int)0", "longCol=(long)0", "floatCol=(float)0",
            "doubleCol=(double)0", "datetimeCol=new DateTime(0)",
            "stringCol=`test`")
        # there are no nulls here, so all three conversion options should work, and result in identical dataframes
        with self.subTest(msg="convert null when no null values"):
            df = tableToDataFrame(tab_reg,
                                  convertNulls='ERROR',
                                  categoricals=None)
            df_reg = tableToDataFrame(tab_reg,
                                      convertNulls='PASS',
                                      categoricals=None)
            df_reg_nc = tableToDataFrame(tab_reg,
                                         convertNulls='CONVERT',
                                         categoricals=None)

        # EQUALITY CHECK
        with self.subTest(msg='converted dfs are equal'):
            self.assertTrue(df.equals(df_reg))  # equals is transitive
            self.assertTrue(df_reg.equals(df_reg_nc))

        # DATA TYPE TEST
        for col, dtyp in [('boolCol', numpy.bool_), ('byteCol', numpy.int8),
                          ('shortCol', numpy.int16), ('intCol', numpy.int32),
                          ('longCol', numpy.int64),
                          ('floatCol', numpy.float32),
                          ('doubleCol', numpy.float64),
                          ('datetimeCol', numpy.dtype('datetime64[ns]')),
                          ('stringCol', numpy.object)]:
            # NB: I'm confident that dtype is not checked for df.equals(), so it's not redundant to do both
            with self.subTest(
                    msg='dtype nulls_convert=ERROR for {}'.format(col)):
                self.assertEqual(df[col].values.dtype, dtyp)
            with self.subTest(
                    msg='dtype nulls_convert=PASS for {}'.format(col)):
                self.assertEqual(df_reg[col].values.dtype, dtyp)
            with self.subTest(
                    msg='dtype nulls_convert=CONVERT for {}'.format(col)):
                self.assertEqual(
                    df_reg_nc[col].values.dtype,
                    dtyp)  # there are no nulls -> no dumb type casts

        # VALUES TEST
        for col, val in [('boolCol', False), ('byteCol', 0), ('shortCol', 0),
                         ('intCol', 0), ('longCol', 0), ('floatCol', 0),
                         ('doubleCol', 0),
                         ('datetimeCol', numpy.datetime64(0, 'ns')),
                         ('stringCol', u'test')]:
            # NB: raw unicode string should be simultaneously python2/3 compliant
            with self.subTest(msg='entries for {}'.format(col)):
                self.assertEqual(df[col].values[0], val)
示例#7
0
    def setUpClass(cls):
        """
        Inherited method allowing initialization of test environment
        """
        cls.table = TableTools.emptyTable(200).update("timestamp=new DateTime((long)(i/2)*1000000000)",
                                                      "Sym=((i%2 == 0) ? `MSFT` : `AAPL`)",
                                                      "price=(double)((i%2 == 0) ? 100.0 + (i/2) + 5*Math.random() : 250.0 + (i/2) + 10*Math.random())")

        longs = numpy.arange(0, 86401, 60, dtype=numpy.int64)
        cls.arrays = {
            'DateTime[]': longs.astype('datetime64[s]'),
            'long[]': longs,
            'int[]': longs.astype(numpy.int32),
            'float[]': longs.astype(numpy.float32),
            'double[]': longs.astype(numpy.float64),
        }
 def setUpClass(cls):
     """
     Inherited method allowing initialization of test environment
     """
     # Tables
     cls.bool_table = TableTools.emptyTable(100).update(
         "X = true", "Y = false", "Z = (i % 2 == 0) ? true : false")
     cls.byte_table = TableTools.emptyTable(100).update(
         "X = (byte)i", "Y = (byte)(100 - X)", "Z = (byte)(-101 + X)")
     cls.short_table = TableTools.emptyTable(100).update(
         "X = (short)i", "Y = (short)(100 - X)", "Z = (short)(-101 + X)")
     cls.int_table = TableTools.emptyTable(100).update(
         "X = (int)i", "Y = 100 - X", "Z = -101 + X")
     cls.long_table = TableTools.emptyTable(100).update(
         "X = (long)i", "Y = 100 - X", "Z = -101 + X")
     cls.float_table = TableTools.emptyTable(100).update(
         "X = (float)i", "Y = (float)sqrt(X)", "Z = (float)sqrt(Y)")
     cls.double_table = TableTools.emptyTable(100).update(
         "X = (double)i", "Y = sqrt(X)", "Z = sqrt(Y)")
     # NumPy arrays
     cls.bool_array = \
         np.array([[True, False, True], [True, False, False]] * 50,
         dtype = np.bool_)
     cls.byte_array = np.vstack(
         (np.arange(0, 100,
                    dtype=np.byte), np.arange(100, 0, -1, dtype=np.byte),
          np.arange(-101, -1, dtype=np.byte))).T
     cls.short_array = np.vstack(
         (np.arange(0, 100,
                    dtype=np.short), np.arange(100, 0, -1, dtype=np.short),
          np.arange(-101, -1, dtype=np.short))).T
     cls.int_array = np.vstack(
         (np.arange(0, 100,
                    dtype=np.intc), np.arange(100, 0, -1, dtype=np.intc),
          np.arange(-101, -1, dtype=np.intc))).T
     cls.long_array = np.vstack(
         (np.arange(0, 100,
                    dtype=np.int_), np.arange(100, 0, -1, dtype=np.int_),
          np.arange(-101, -1, dtype=np.int_))).T
     cls.float_array = np.vstack(
         (np.arange(0, 100, dtype=np.single),
          np.sqrt(np.arange(0, 100, dtype=np.single)),
          np.sqrt(np.sqrt(np.arange(0, 100, dtype=np.single))))).T
     cls.double_array = np.vstack(
         (np.arange(0, 100, dtype=np.double),
          np.sqrt(np.arange(0, 100, dtype=np.double)),
          np.sqrt(np.sqrt(np.arange(0, 100, dtype=np.double))))).T
 def test_column(self):
     t = TableTools.emptyTable(10).view(
         "I=ii", "J=(ii * 2)").update("K = vectorized_func(I, J)")
     html_output = TableTools.html(t)
     self.assertIn("<td>9</td>", html_output)
 def test_part_of_expr(self):
     with self.assertRaises(Exception):
         t = TableTools.emptyTable(10).view(
             "I=ii", "J=(ii * 2)").update("K = 2 * vectorized_func(I, J)")
示例#11
0
 def test_filter(self):
     t = TableTools.emptyTable(10).view(
         "I=ii", "J=(ii * 2)").where("vectorized_func(I, J)")
     html_output = TableTools.html(t)
     self.assertIn("<td>5</td><td>10</td>", html_output)
示例#12
0
 def test_wrong_return_type(self):
     with self.assertRaises(Exception):
         t = TableTools.emptyTable(10).view("I=ii", "J=(ii * 2)")\
             .where("vectorized_func_wrong_return_type(I, J)")
示例#13
0
 def test_long_number_conversion(self):
     t = TableTools.emptyTable(1)
     result = TableTools.string(t.update("X = long_value"), 1)
     self.assertEqual(long_value, int(result.split()[2]))
    def testTableToDataframeWithNulls(self):
        """
        Test for converting a basic table with null values to a dataframe
        """

        tab_nulls = TableTools.emptyTable(2).update(
            "boolCol=((i==0) ? true : null)",
            "byteCol=(byte)((i==0) ? 0 : NULL_BYTE)",
            "shortCol=(short)((i==0) ? 2 : NULL_SHORT)",
            "intCol=(int)((i==0) ? 0 : NULL_INT)",
            "longCol=(long)((i==0) ? 0 : NULL_LONG)",
            "floatCol=(float)((i==0) ? 2 : NULL_FLOAT)",
            "doubleCol=(double)((i==0) ? 2 : NULL_DOUBLE)",
            "datetimeCol=((i==0) ? new DateTime(0) : null)")
        with self.subTest(
                msg="Does not convert if convertNulls=ERROR and nulls present"
        ):
            self.assertRaises(ValueError,
                              tableToDataFrame,
                              tab_nulls,
                              convertNulls='ERROR',
                              categoricals=None)
        with self.subTest(
                msg=
                "Converts if convertNulls in [PASS, CONVERT] and nulls present"
        ):
            df_nulls = tableToDataFrame(tab_nulls,
                                        convertNulls='PASS',
                                        categoricals=None)
            df_nulls_nc = tableToDataFrame(tab_nulls,
                                           convertNulls='CONVERT',
                                           categoricals=None)

        # EQUALITY CHECK
        self.assertFalse(df_nulls.equals(df_nulls_nc))

        # DATA TYPES TEST
        # verify that the dtypes are as expected when we DO NOT convert the nulls
        for col, dtyp in [('boolCol', numpy.bool_), ('byteCol', numpy.int8),
                          ('shortCol', numpy.int16), ('intCol', numpy.int32),
                          ('longCol', numpy.int64),
                          ('floatCol', numpy.float32),
                          ('doubleCol', numpy.float64),
                          ('datetimeCol', numpy.dtype('datetime64[ns]'))]:
            with self.subTest(
                    msg='data type, nulls_convert=False, for {}'.format(col)):
                self.assertEqual(df_nulls[col].values.dtype, dtyp)  # as before
        # verify that the dtypes are as expected when we DO convert the nulls
        for col, dtyp in [
            ('boolCol', numpy.object), ('byteCol', numpy.float32),
            ('shortCol', numpy.float32), ('intCol', numpy.float64),
            ('longCol', numpy.float64), ('floatCol', numpy.float32),
            ('doubleCol', numpy.float64),
            ('datetimeCol', numpy.dtype('datetime64[ns]'))
        ]:
            with self.subTest(
                    msg='data type, nulls_convert=True, for {}'.format(col)):
                self.assertEqual(df_nulls_nc[col].values.dtype, dtyp)

        # VALUES TEST
        # verify that the null entries are as expected when we DO NOT convert the nulls
        for col, val in [
            ('boolCol', False),
            ('byteCol', NULL_BYTE),
            ('shortCol', NULL_SHORT),
            ('intCol', NULL_INT),
            ('longCol', NULL_LONG),
        ]:
            with self.subTest(
                    msg='null entry, nulls_convert=False, for {}'.format(col)):
                self.assertEqual(df_nulls[col].values[1], val)
        # floating point types & time converted to NaN/T regardless of null conversion
        with self.subTest(msg='null entry, nulls_convert=False, for floatCol'):
            self.assertTrue(numpy.isnan(df_nulls['floatCol'].values[1]))
        with self.subTest(
                msg='null entry, nulls_convert=False, for doubleCol'):
            self.assertTrue(numpy.isnan(df_nulls['doubleCol'].values[1]))
        with self.subTest(
                msg='null entry, nulls_convert=False, for datetimeCol'):
            self.assertTrue(numpy.isnat(df_nulls['datetimeCol'].values[1]))
        # verify that the null entries are as expected when we DO convert the nulls
        with self.subTest(msg='entries nulls_convert=True for bool'):
            self.assertIsNone(df_nulls_nc['boolCol'][1])
        for col in [
                'byteCol', 'shortCol', 'intCol', 'longCol', 'floatCol',
                'doubleCol'
        ]:
            with self.subTest(msg='regular entry, nulls_convert=True, for {}'.
                              format(col)):
                self.assertFalse(numpy.isnan(df_nulls_nc[col].values[0]))
            with self.subTest(
                    msg='null entry, nulls_convert=True, for {}'.format(col)):
                self.assertTrue(numpy.isnan(df_nulls_nc[col].values[1]))
        with self.subTest(
                msg='regular entry, nulls_convert=True, for datetimeCol'):
            self.assertEqual(df_nulls_nc['datetimeCol'].values[0],
                             numpy.datetime64(0, 'ns'))
        with self.subTest(
                msg='null entry, nulls_convert=False, for {}'.format(col)):
            self.assertTrue(numpy.isnat(df_nulls['datetimeCol'].values[1]))
    def testArrayColumnConversion(self):
        """
        Test for behavior when one of the columns is of array type (in each direction)
        """

        firstTable = TableTools.emptyTable(10).update(
            "MyString=new String(`a`+i)",
            "MyChar=new Character((char) ((i%26)+97))",
            "MyBoolean=new Boolean(i%2==0)",
            "MyByte=new java.lang.Byte(Integer.toString(i%127))",
            "MyShort=new Short(Integer.toString(i%32767))",
            "MyInt=new Integer(i)", "MyLong=new Long(i)",
            "MyFloat=new Float(i+i/10)", "MyDouble=new Double(i+i/10)")
        arrayTable = firstTable.update("A=i%3").groupBy("A")
        dataFrame = tableToDataFrame(arrayTable,
                                     convertNulls='PASS',
                                     categoricals=None)

        for colName, arrayType in [
            ('MyString', 'io.deephaven.vector.ObjectVector'),
            ('MyChar', 'io.deephaven.vector.CharVector'),
            ('MyBoolean', 'io.deephaven.vector.ObjectVector'
             ),  # NB: BooleanVector is deprecated
            ('MyByte', 'io.deephaven.vector.ByteVector'),
            ('MyShort', 'io.deephaven.vector.ShortVector'),
            ('MyInt', 'io.deephaven.vector.IntVector'),
            ('MyLong', 'io.deephaven.vector.LongVector'),
            ('MyFloat', 'io.deephaven.vector.FloatVector'),
            ('MyDouble', 'io.deephaven.vector.DoubleVector'),
        ]:
            with self.subTest(
                    msg="type for original column {}".format(colName)):
                self.assertEqual(
                    arrayTable.getColumn(colName).getType().getName(),
                    arrayType)
                self.assertEqual(dataFrame[colName].values.dtype, numpy.object)

        for colName, dtype in [
            ('MyBoolean', numpy.bool_),
            ('MyByte', numpy.int8),
            ('MyShort', numpy.int16),
            ('MyInt', numpy.int32),
            ('MyLong', numpy.int64),
            ('MyFloat', numpy.float32),
            ('MyDouble', numpy.float64),
        ]:
            with self.subTest(
                    msg="type of converted array for {}".format(colName)):
                self.assertTrue(
                    isinstance(dataFrame[colName].values[0], numpy.ndarray))
                self.assertEqual(dataFrame[colName].values[0].dtype, dtype)

        with self.subTest(msg="type of converted array for MyString"):
            self.assertTrue(
                isinstance(dataFrame['MyString'].values[0], numpy.ndarray))
            self.assertTrue(
                dataFrame['MyString'].values[0].dtype.name.startswith(
                    'unicode') or
                dataFrame['MyString'].values[0].dtype.name.startswith('str'))

        # NB: numpy really doesn't have a char type, so it gets treated like an uninterpretted type
        with self.subTest(msg="type of converted array for MyChar"):
            self.assertTrue(
                isinstance(dataFrame['MyChar'].values[0], numpy.ndarray))
            self.assertTrue(
                dataFrame['MyChar'].values[0].dtype.name.startswith('unicode')
                or dataFrame['MyChar'].values[0].dtype.name.startswith('str'))

        # convert back
        backTable = dataFrameToTable(dataFrame, convertUnknownToString=True)
        for colName, arrayType in [
            ('MyString', 'io.deephaven.vector.ObjectVectorDirect'),
            ('MyChar', 'io.deephaven.vector.CharVectorDirect'),
            ('MyBoolean', 'io.deephaven.vector.ObjectVectorDirect'),
            ('MyByte', 'io.deephaven.vector.ByteVectorDirect'),
            ('MyShort', 'io.deephaven.vector.ShortVectorDirect'),
            ('MyInt', 'io.deephaven.vector.IntVectorDirect'),
            ('MyLong', 'io.deephaven.vector.LongVectorDirect'),
            ('MyFloat', 'io.deephaven.vector.FloatVectorDirect'),
            ('MyDouble', 'io.deephaven.vector.DoubleVectorDirect'),
        ]:
            with self.subTest(
                    msg="type for reverted column for {}".format(colName)):
                self.assertEqual(
                    backTable.getColumn(colName).getType().getName(),
                    arrayType)
        with self.subTest(msg="element type for reverted column MyBoolean"):
            self.assertEqual(
                backTable.getColumn('MyBoolean').get(
                    0).getComponentType().getName(), 'java.lang.Boolean')
        with self.subTest(msg="element type for reverted column MyString"):
            self.assertEqual(
                backTable.getColumn('MyString').get(
                    0).getComponentType().getName(), 'java.lang.String')