def setUp(self): self.rows = (('a', 2, 3, 4), (None, 3, 5, None), ('a', 2, 4, None), ('b', 3, 6, None)) self.number_type = Number() self.text_type = Text() self.column_names = ['one', 'two', 'three', 'four'] self.column_types = [ self.text_type, self.number_type, self.number_type, self.number_type ] self.table = Table(self.rows, self.column_names, self.column_types)
def setUp(self): self.rows = ( (1, 2, 'a'), (2, 3, 'b'), (None, 4, 'c') ) self.number_type = Number() self.text_type = Text() self.column_names = ['one', 'two', 'three'] self.column_types = [self.number_type, self.number_type, self.text_type] self.table = Table(self.rows, self.column_names, self.column_types)
def test_group_by_key_type(self): table = Table(self.rows, self.column_names, self.column_types) tableset = table.group_by('two', key_type=Text()) self.assertIsInstance(tableset, TableSet) self.assertEqual(tableset.key_name, 'two') self.assertIsInstance(tableset.key_type, Text) self.assertIn('2', tableset.keys()) self.assertIn('3', tableset.keys()) self.assertSequenceEqual(tableset['2'].columns['one'], ('a', 'a')) self.assertSequenceEqual(tableset['3'].columns['one'], (None, 'b'))
def test_group_by_function(self): table = Table(self.rows, self.column_names, self.column_types) tableset = table.group_by(lambda r: r['three'] < 5, key_type=Boolean()) self.assertIsInstance(tableset, TableSet) self.assertEqual(len(tableset), 2) self.assertEqual(tableset.key_name, 'group') self.assertIn(True, tableset.keys()) self.assertIn(False, tableset.keys()) self.assertSequenceEqual(tableset[True].columns['one'], ('a', 'a', 'b')) self.assertSequenceEqual(tableset[False].columns['one'], (None,))
def test_inner_join_same_column_name(self): right_column_names = ['four', 'one', 'six'] right = Table(self.right_rows, right_column_names, self.column_types) new_table = self.left.join(right, 'one', inner=True) self.assertIsNot(new_table, self.left) self.assertIsNot(new_table, right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text]) self.assertRows(new_table, [ (2, 3, 'b', None, 'c') ])
def test_print_table_max_column_width(self): rows = (('1.7', 2, 'this is long'), ('11.18', None, None), ('0', 1, 'nope')) column_names = ['one', 'two', 'also, this is long'] table = Table(rows, column_names, self.column_types) output = six.StringIO() table.print_table(output=output, max_column_width=7) lines = output.getvalue().split('\n') self.assertIn(' also... ', lines[0]) self.assertIn(' this... ', lines[2]) self.assertIn(' nope ', lines[4])
def test_from_json_file_like_object(self): table1 = Table(self.rows, self.column_names, self.column_types) if six.PY2: with open('examples/test.json') as f: table2 = Table.from_json(f) else: with open('examples/test.json', encoding='utf-8') as f: table2 = Table.from_json(f) self.assertColumnNames(table2, self.column_names) self.assertColumnTypes( table2, [Number, Text, Boolean, Date, DateTime, TimeDelta]) self.assertRows(table2, table1.rows)
def test_count(self): rows = ( (1, 2, 'a'), (2, 3, 'b'), (None, 4, 'c'), (1, 2, 'a'), (1, 2, 'a') ) table = Table(rows, self.column_names, self.column_types) self.assertEqual(table.columns['one'].aggregate(Count(1)), 3) self.assertEqual(table.columns['one'].aggregate(Count(4)), 0) self.assertEqual(table.columns['one'].aggregate(Count(None)), 1)
def test_denormalize_default_value(self): table = Table(self.rows, self.column_names, self.column_types) normalized_table = table.denormalize(['first_name', 'last_name'], 'property', 'value', default_value='hello') normal_rows = ( ('Jane', 'Code', 'female', '27'), ('Jim', 'Program', 'male', 'hello'), ('Jim', 'Bytes', 'hello', '24'), ) self.assertRows(normalized_table, normal_rows) self.assertColumnNames(normalized_table, ['first_name', 'last_name', 'gender', 'age']) self.assertColumnTypes(normalized_table, [Text, Text, Text, Text])
def test_max(self): rows = [ [datetime.datetime(1994, 3, 3, 6, 31)], [datetime.datetime(1994, 3, 3, 6, 30, 30)], [datetime.datetime(1994, 3, 3, 6, 30)], ] table = Table(rows, ['test'], [DateTime()]) self.assertIsInstance( Max('test').get_aggregate_data_type(table), DateTime) Max('test').validate(table) self.assertEqual( Max('test').run(table), datetime.datetime(1994, 3, 3, 6, 31))
def test_to_csv(self): table = Table(self.rows, self.column_names, self.column_types) table.to_csv('.test.csv') with open('.test.csv') as f: contents1 = f.read() with open('examples/test.csv') as f: contents2 = f.read() self.assertEqual(contents1, contents2) os.remove('.test.csv')
def test_all(self): rows = [ [True], [True], [None] ] table = Table(rows, ['test'], [Boolean()]) All('test', True).validate(table) self.assertEqual(All('test', True).run(table), False) rows = [ [True], [True], [True] ] table = Table(rows, ['test'], [Boolean()]) All('test', True).validate(table) self.assertEqual(All('test', True).run(table), True) self.assertEqual(All('test', lambda r: r).run(table), True) self.assertEqual(All('test', False).run(table), False) self.assertEqual(All('test', lambda r: not r).run(table), False)
def test_denormalize(self): table = Table(self.rows, self.column_names, self.column_types) normalized_table = table.denormalize('first_name', 'property', 'value') normal_rows = ( ('Jane', 'female', 27), ('Jim', 'male', 24), ) self.assertRows(normalized_table, normal_rows) self.assertColumnNames(normalized_table, ['first_name', 'gender', 'age']) self.assertColumnTypes(normalized_table, [Text, Text, Number]) self.assertRowNames(normalized_table, ['Jane', 'Jim'])
def test_bins_decimals(self): rows = [] for i in range(0, 100): rows.append([Decimal(i) / Decimal('100')]) columns = ( ('number', self.number_type), ) new_table = Table(rows, columns).bins('number') self.assertSequenceEqual(new_table.rows[0], ['[0.0 - 0.1)', 10]) self.assertSequenceEqual(new_table.rows[3], ['[0.3 - 0.4)', 10]) self.assertSequenceEqual(new_table.rows[9], ['[0.9 - 1.0]', 10])
def test_bins_mixed_signs(self): rows = [] for i in range(0, -100, -1): rows.append([i + 50]) columns = ( ('number', self.number_type), ) new_table = Table(rows, columns).bins('number') self.assertSequenceEqual(new_table.rows[0], ['[-50 - -40)', 9]) self.assertSequenceEqual(new_table.rows[3], ['[-20 - -10)', 10]) self.assertSequenceEqual(new_table.rows[9], ['[40 - 50]', 11])
def test_bins_negative(self): rows = [] for i in range(0, -100, -1): rows.append([i]) columns = ( ('number', self.number_type), ) new_table = Table(rows, columns).bins('number', 10, -100, 0) self.assertSequenceEqual(new_table.rows[0], ['[-100 - -90)', 9]) self.assertSequenceEqual(new_table.rows[3], ['[-70 - -60)', 10]) self.assertSequenceEqual(new_table.rows[9], ['[-10 - 0]', 11])
def test_order_by(self): table = Table(self.rows, self.columns) new_table = table.order_by('two') self.assertIsNot(new_table, table) self.assertEqual(len(new_table.rows), 3) self.assertSequenceEqual(new_table.rows[0], (None, 2, u'👍')) self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b')) self.assertSequenceEqual(new_table.rows[2], (1, 4, 'a')) # Verify old table not changed self.assertSequenceEqual(table.rows[0], (1, 4, 'a')) self.assertSequenceEqual(table.rows[1], (2, 3, 'b')) self.assertSequenceEqual(table.rows[2], (None, 2, u'👍'))
def test_create_variable_length_rows(self): rows = ( (1, 4, 'a'), (2,), (None, 2) ) table = Table(rows, self.column_names, self.column_types) table2 = Table(rows) self.assertColumnNames(table, self.column_names) self.assertColumnTypes(table, [Number, Number, Text]) self.assertRows(table, [ (1, 4, 'a'), (2, None, None), (None, 2, None) ]) self.assertColumnTypes(table2, [Number, Number, Text]) self.assertRows(table2, [ (1, 4, 'a'), (2, None, None), (None, 2, None) ])
def test_bins_decimals(self): rows = [] for i in range(0, 100): rows.append([Decimal(i) / Decimal('100')]) new_table = Table(rows, self.column_names, self.column_types).bins('number') self.assertColumnNames(new_table, ['number', 'Count']) self.assertColumnTypes(new_table, [Text, Number]) self.assertSequenceEqual(new_table.rows[0], ['[0.0 - 0.1)', 10]) self.assertSequenceEqual(new_table.rows[3], ['[0.3 - 0.4)', 10]) self.assertSequenceEqual(new_table.rows[9], ['[0.9 - 1.0]', 10])
def test_bins_mixed_signs(self): rows = [] for i in range(0, -100, -1): rows.append([i + 50]) new_table = Table(rows, self.column_names, self.column_types).bins('number') self.assertColumnNames(new_table, ['number', 'Count']) self.assertColumnTypes(new_table, [Text, Number]) self.assertSequenceEqual(new_table.rows[0], ['[-50 - -40)', 9]) self.assertSequenceEqual(new_table.rows[3], ['[-20 - -10)', 10]) self.assertSequenceEqual(new_table.rows[9], ['[40 - 50]', 11])
def test_bins_negative(self): rows = [] for i in range(0, -100, -1): rows.append([i]) new_table = Table(rows, self.column_names, self.column_types).bins('number', 10, -100, 0) self.assertColumnNames(new_table, ['number', 'Count']) self.assertColumnTypes(new_table, [Text, Number]) self.assertSequenceEqual(new_table.rows[0], ['[-100 - -90)', 9]) self.assertSequenceEqual(new_table.rows[3], ['[-70 - -60)', 10]) self.assertSequenceEqual(new_table.rows[9], ['[-10 - 0]', 11])
def setUp(self): self.rows = ( (Decimal('1.1'), Decimal('2.19'), 'a'), (Decimal('2.7'), Decimal('3.42'), 'b'), (None, Decimal('4.1'), 'c'), (Decimal('2.7'), Decimal('3.42'), 'c') ) self.number_type = Number() self.text_type = Text() self.column_names = ['one', 'two', 'three'] self.column_types = [self.number_type, self.number_type, self.text_type] self.table = Table(self.rows, self.column_names, self.column_types)
def test_group_by_key_name(self): table = Table(self.rows, self.columns) tableset = table.group_by('one', key_name='test') self.assertIsInstance(tableset, TableSet) self.assertEqual(tableset.key_name, 'test') self.assertIsInstance(tableset.key_type, Text) self.assertIn('a', tableset.keys()) self.assertIn('b', tableset.keys()) self.assertIn(None, tableset.keys()) self.assertSequenceEqual(tableset['a'].columns['one'], ('a', 'a')) self.assertSequenceEqual(tableset['b'].columns['one'], ('b',))
def test_create_variable_length_rows(self): rows = ( (1, 4, 'a'), (2,), (None, 2) ) table = Table(rows, self.columns) self.assertEqual(len(table.rows), 3) self.assertEqual(len(table.columns), 3) self.assertSequenceEqual(table.rows[0], (1, 4, 'a')) self.assertSequenceEqual(table.rows[1], (2, None, None)) self.assertSequenceEqual(table.rows[2], (None, 2, None))
def test_group_by_number(self): table = Table(self.rows, self.columns) tableset = table.group_by('two') self.assertIsInstance(tableset, TableSet) self.assertEqual(len(tableset), 2) self.assertEqual(tableset.key_name, 'two') self.assertIsInstance(tableset.key_type, Number) self.assertIn(Decimal('2'), tableset.keys()) self.assertIn(Decimal('3'), tableset.keys()) self.assertSequenceEqual(tableset[Decimal('2')].columns['one'], ('a', 'a')) self.assertSequenceEqual(tableset[Decimal('3')].columns['one'], (None, 'b'))
def test_changed_invalid_types(self): rows = ( (False, True), (True, False) ) column_names = ['before', 'after'] column_types = [Boolean(), Boolean()] table = Table(rows, column_names, column_types) with self.assertRaises(DataTypeError): table.compute([ ('test', Change('before', 'after')) ])
def test_denormalize_column_types(self): table = Table(self.rows, self.column_names, self.column_types) normalized_table = table.denormalize(None, 'property', 'value', column_types=[Text(), Number()]) # NB: value has been overwritten normal_rows = (('male', 24), ) self.assertRows(normalized_table, normal_rows) self.assertColumnNames(normalized_table, ['gender', 'age']) self.assertColumnTypes(normalized_table, [Text, Number])
def test_change_mixed_types(self): rows = ( ('1', '10/24/1978'), ('2', '11/13/1974') ) column_names = ['number', 'date'] column_types = [Number(), Date()] table = Table(rows, column_names, column_types) with self.assertRaises(DataTypeError): table.compute([ ('test', Change('number', 'date')) ])
def test_to_json_make_dir(self): table = Table(self.rows, self.column_names, self.column_types) table.to_json('newdir/test.json') with open('newdir/test.json') as f1: js1 = json.load(f1) with open('examples/test.json') as f2: js2 = json.load(f2) self.assertEqual(js1, js2) os.remove('newdir/test.json') os.rmdir('newdir/')
def test_percentile_rank(self): rows = [(n, ) for n in range(1, 1001)] table = Table(rows, ['ints'], [self.number_type]) new_table = table.compute([('percentiles', PercentileRank('ints'))]) self.assertEqual(len(new_table.rows), 1000) self.assertEqual(len(new_table.columns), 2) self.assertSequenceEqual(new_table.rows[0], (1, 0)) self.assertSequenceEqual(new_table.rows[50], (51, 5)) self.assertSequenceEqual(new_table.rows[499], (500, 49)) self.assertSequenceEqual(new_table.rows[500], (501, 50)) self.assertSequenceEqual(new_table.rows[998], (999, 99)) self.assertSequenceEqual(new_table.rows[999], (1000, 100))