def test_join_match_multiple(self): left_rows = ( (1, 4, 'a'), (2, 3, 'b') ) right_rows = ( (1, 1, 'a'), (1, 2, 'a'), (2, 2, 'b') ) left = Table(left_rows, self.left_column_names, self.column_types) right = Table(right_rows, self.right_column_names, self.column_types) new_table = left.join(right, 'one', 'five') self.assertIsNot(new_table, left) self.assertIsNot(new_table, right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text]) self.assertRows(new_table, [ (1, 4, 'a', 1, 'a'), (2, 3, 'b', 1, 'a'), (2, 3, 'b', 2, 'b') ])
def test_join_match_multiple(self): left_rows = ( (1, 4, 'a'), (2, 3, 'b') ) right_rows = ( (1, 1, 'a'), (1, 2, 'a'), (2, 2, 'b') ) left = Table(left_rows, self.left_column_names, self.column_types) right = Table(right_rows, self.right_column_names, self.column_types) new_table = left.join(right, 'one', 'five') self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 5) self.assertEqual(new_table.columns[0].name, 'one') self.assertEqual(new_table.columns[1].name, 'two') self.assertEqual(new_table.columns[2].name, 'three') self.assertEqual(new_table.columns[3].name, 'four') self.assertEqual(new_table.columns[4].name, 'six') self.assertIsInstance(new_table.columns[0].data_type, Number) self.assertIsInstance(new_table.columns[1].data_type, Number) self.assertIsInstance(new_table.columns[2].data_type, Text) self.assertIsInstance(new_table.columns[3].data_type, Number) self.assertIsInstance(new_table.columns[4].data_type, Text) self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a', 1, 'a')) self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b', 1, 'a')) self.assertSequenceEqual(new_table.rows[2], (2, 3, 'b', 2, 'b'))
def test_join_match_multiple(self): left_rows = ( (1, 4, 'a'), (2, 3, 'b') ) right_rows = ( (1, 1, 'a'), (1, 2, 'a'), (2, 2, 'b') ) left = Table(left_rows, self.left_columns) right = Table(right_rows, self.right_columns) new_table = left.join(right, 'one', 'five') self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 5) self.assertEqual(new_table.columns[0].name, 'one') self.assertEqual(new_table.columns[1].name, 'two') self.assertEqual(new_table.columns[2].name, 'three') self.assertEqual(new_table.columns[3].name, 'four') self.assertEqual(new_table.columns[4].name, 'six') self.assertIsInstance(new_table.columns[0].data_type, Number) self.assertIsInstance(new_table.columns[1].data_type, Number) self.assertIsInstance(new_table.columns[2].data_type, Text) self.assertIsInstance(new_table.columns[3].data_type, Number) self.assertIsInstance(new_table.columns[4].data_type, Text) self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a', 1, 'a')) self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b', 1, 'a')) self.assertSequenceEqual(new_table.rows[2], (2, 3, 'b', 2, 'b'))
def test_full_outer(self): left_rows = ( (1, 4, 'a'), (2, 3, 'b'), (3, 2, 'c') ) right_rows = ( (1, 4, 'a'), (2, 3, 'b'), (4, 2, 'c') ) left = Table(left_rows, self.left_column_names, self.column_types) right = Table(right_rows, self.right_column_names, self.column_types) new_table = left.join(right, 'one', 'four', full_outer=True) self.assertIsNot(new_table, left) self.assertIsNot(new_table, right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'five', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Number, Number, Text]) self.assertRows(new_table, [ (1, 4, 'a', 1, 4, 'a'), (2, 3, 'b', 2, 3, 'b'), (3, 2, 'c', None, None, None), (None, None, None, 4, 2, 'c') ])
def test_join_with_row_names(self): left = Table(self.left_rows, self.left_column_names, self.column_types, row_names='three') new_table = left.join(self.right, 'one', 'four') self.assertRowNames(new_table, ('a', 'b', 'c'))
def test_join_by_row_number_short_left(self): left_rows = self.left_rows + ((7, 9, 'z'),) left = Table(left_rows, self.left_column_names, self.column_types) new_table = left.join(self.right, full_outer=True) self.assertIsNot(new_table, left) self.assertIsNot(new_table, self.right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'five', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Number, Number, Text]) self.assertRows(new_table, [ (1, 4, 'a', 1, 4, 'a'), (2, 3, 'b', 2, 3, 'b'), (None, 2, 'c', None, 2, 'c'), (7, 9, 'z', None, None, None) ])
def test_join_with_row_names(self): left = Table(self.left_rows, self.left_columns, row_names='three') new_table = left.join(self.right, 'one', 'four') self.assertSequenceEqual(new_table.rows['a'], (1, 4, 'a', 4, 'a')) self.assertSequenceEqual(new_table.row_names, ('a', 'b', 'c'))
class TestTableJoin(unittest.TestCase): def setUp(self): self.left_rows = ( (1, 4, 'a'), (2, 3, 'b'), (None, 2, 'c') ) self.right_rows = ( (1, 4, 'a'), (2, 3, 'b'), (None, 2, 'c') ) self.number_type = Number() self.text_type = Text() self.left_columns = ( ('one', self.number_type), ('two', self.number_type), ('three', self.text_type) ) self.right_columns = ( ('four', self.number_type), ('five', self.number_type), ('six', self.text_type) ) self.left = Table(self.left_rows, self.left_columns) self.right = Table(self.right_rows, self.right_columns) def test_join(self): new_table = self.left.join(self.right, 'one', 'four') self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 5) self.assertEqual(new_table.columns[0].name, 'one') self.assertEqual(new_table.columns[1].name, 'two') self.assertEqual(new_table.columns[2].name, 'three') self.assertEqual(new_table.columns[3].name, 'five') self.assertEqual(new_table.columns[4].name, 'six') self.assertIsInstance(new_table.columns[0].data_type, Number) self.assertIsInstance(new_table.columns[1].data_type, Number) self.assertIsInstance(new_table.columns[2].data_type, Text) self.assertIsInstance(new_table.columns[3].data_type, Number) self.assertIsInstance(new_table.columns[4].data_type, Text) self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a', 4, 'a')) self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b', 3, 'b')) self.assertSequenceEqual(new_table.rows[2], (None, 2, 'c', 2, 'c')) def test_join_match_multiple(self): left_rows = ( (1, 4, 'a'), (2, 3, 'b') ) right_rows = ( (1, 1, 'a'), (1, 2, 'a'), (2, 2, 'b') ) left = Table(left_rows, self.left_columns) right = Table(right_rows, self.right_columns) new_table = left.join(right, 'one', 'five') self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 5) self.assertEqual(new_table.columns[0].name, 'one') self.assertEqual(new_table.columns[1].name, 'two') self.assertEqual(new_table.columns[2].name, 'three') self.assertEqual(new_table.columns[3].name, 'four') self.assertEqual(new_table.columns[4].name, 'six') self.assertIsInstance(new_table.columns[0].data_type, Number) self.assertIsInstance(new_table.columns[1].data_type, Number) self.assertIsInstance(new_table.columns[2].data_type, Text) self.assertIsInstance(new_table.columns[3].data_type, Number) self.assertIsInstance(new_table.columns[4].data_type, Text) self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a', 1, 'a')) self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b', 1, 'a')) self.assertSequenceEqual(new_table.rows[2], (2, 3, 'b', 2, 'b')) def test_join2(self): new_table = self.left.join(self.right, 'one', 'five') self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 5) self.assertEqual(new_table.columns[0].name, 'one') self.assertEqual(new_table.columns[1].name, 'two') self.assertEqual(new_table.columns[2].name, 'three') self.assertEqual(new_table.columns[3].name, 'four') self.assertEqual(new_table.columns[4].name, 'six') self.assertIsInstance(new_table.columns[0].data_type, Number) self.assertIsInstance(new_table.columns[1].data_type, Number) self.assertIsInstance(new_table.columns[2].data_type, Text) self.assertIsInstance(new_table.columns[3].data_type, Number) self.assertIsInstance(new_table.columns[4].data_type, Text) self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a', None, None)) self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b', None, 'c')) self.assertSequenceEqual(new_table.rows[2], (None, 2, 'c', None, None)) def test_join_same_column_name(self): right_columns = ( ('four', self.number_type), ('one', self.number_type), ('six', self.text_type) ) right = Table(self.right_rows, right_columns) new_table = self.left.join(right, 'one') self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 5) self.assertEqual(new_table.columns[0].name, 'one') self.assertEqual(new_table.columns[1].name, 'two') self.assertEqual(new_table.columns[2].name, 'three') self.assertEqual(new_table.columns[3].name, 'four') self.assertEqual(new_table.columns[4].name, 'six') self.assertIsInstance(new_table.columns[0].data_type, Number) self.assertIsInstance(new_table.columns[1].data_type, Number) self.assertIsInstance(new_table.columns[2].data_type, Text) self.assertIsInstance(new_table.columns[3].data_type, Number) self.assertIsInstance(new_table.columns[4].data_type, Text) self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a', None, None)) self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b', None, 'c')) self.assertSequenceEqual(new_table.rows[2], (None, 2, 'c', None, None)) def test_join_func(self): new_table = self.left.join( self.right, lambda left: '%i%s' % (left['two'], left['three']), lambda right: '%i%s' % (right['five'], right['six']) ) self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 6) self.assertEqual(new_table.columns[0].name, 'one') self.assertEqual(new_table.columns[1].name, 'two') self.assertEqual(new_table.columns[2].name, 'three') self.assertEqual(new_table.columns[3].name, 'four') self.assertEqual(new_table.columns[4].name, 'five') self.assertEqual(new_table.columns[5].name, 'six') self.assertIsInstance(new_table.columns[0].data_type, Number) self.assertIsInstance(new_table.columns[1].data_type, Number) self.assertIsInstance(new_table.columns[2].data_type, Text) self.assertIsInstance(new_table.columns[3].data_type, Number) self.assertIsInstance(new_table.columns[4].data_type, Number) self.assertIsInstance(new_table.columns[5].data_type, Text) self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a', 1, 4, 'a')) self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b', 2, 3, 'b')) self.assertSequenceEqual(new_table.rows[2], (None, 2, 'c', None, 2, 'c')) def test_join_column_does_not_exist(self): with self.assertRaises(KeyError): self.left.join(self.right, 'one', 'seven') def test_inner_join(self): new_table = self.left.join(self.right, 'one', 'four', inner=True) self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 5) self.assertEqual(new_table.columns[0].name, 'one') self.assertEqual(new_table.columns[1].name, 'two') self.assertEqual(new_table.columns[2].name, 'three') self.assertEqual(new_table.columns[3].name, 'five') self.assertEqual(new_table.columns[4].name, 'six') self.assertIsInstance(new_table.columns[0].data_type, Number) self.assertIsInstance(new_table.columns[1].data_type, Number) self.assertIsInstance(new_table.columns[2].data_type, Text) self.assertIsInstance(new_table.columns[3].data_type, Number) self.assertIsInstance(new_table.columns[4].data_type, Text) self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a', 4, 'a')) self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b', 3, 'b')) self.assertSequenceEqual(new_table.rows[2], (None, 2, 'c', 2, 'c')) def test_inner_join2(self): new_table = self.left.join(self.right, 'one', 'five', inner=True) self.assertEqual(len(new_table.rows), 1) self.assertEqual(len(new_table.columns), 5) self.assertEqual(new_table.columns[0].name, 'one') self.assertEqual(new_table.columns[1].name, 'two') self.assertEqual(new_table.columns[2].name, 'three') self.assertEqual(new_table.columns[3].name, 'four') self.assertEqual(new_table.columns[4].name, 'six') self.assertIsInstance(new_table.columns[0].data_type, Number) self.assertIsInstance(new_table.columns[1].data_type, Number) self.assertIsInstance(new_table.columns[2].data_type, Text) self.assertIsInstance(new_table.columns[3].data_type, Number) self.assertIsInstance(new_table.columns[4].data_type, Text) self.assertSequenceEqual(new_table.rows[0], (2, 3, 'b', None, 'c')) def test_inner_join_same_column_name(self): right_columns = ( ('four', self.number_type), ('one', self.number_type), ('six', self.text_type) ) right = Table(self.right_rows, right_columns) new_table = self.left.join(right, 'one', inner=True) self.assertEqual(len(new_table.rows), 1) self.assertEqual(len(new_table.columns), 5) self.assertEqual(new_table.columns[0].name, 'one') self.assertEqual(new_table.columns[1].name, 'two') self.assertEqual(new_table.columns[2].name, 'three') self.assertEqual(new_table.columns[3].name, 'four') self.assertEqual(new_table.columns[4].name, 'six') self.assertIsInstance(new_table.columns[0].data_type, Number) self.assertIsInstance(new_table.columns[1].data_type, Number) self.assertIsInstance(new_table.columns[2].data_type, Text) self.assertIsInstance(new_table.columns[3].data_type, Number) self.assertIsInstance(new_table.columns[4].data_type, Text) self.assertSequenceEqual(new_table.rows[0], (2, 3, 'b', None, 'c')) def test_inner_join_func(self): new_table = self.left.join( self.right, lambda left: '%i%s' % (left['two'], left['three']), lambda right: '%i%s' % (right['five'], right['six']), inner=True ) self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 6) self.assertEqual(new_table.columns[0].name, 'one') self.assertEqual(new_table.columns[1].name, 'two') self.assertEqual(new_table.columns[2].name, 'three') self.assertEqual(new_table.columns[3].name, 'four') self.assertEqual(new_table.columns[4].name, 'five') self.assertEqual(new_table.columns[5].name, 'six') self.assertIsInstance(new_table.columns[0].data_type, Number) self.assertIsInstance(new_table.columns[1].data_type, Number) self.assertIsInstance(new_table.columns[2].data_type, Text) self.assertIsInstance(new_table.columns[3].data_type, Number) self.assertIsInstance(new_table.columns[4].data_type, Number) self.assertIsInstance(new_table.columns[5].data_type, Text) def test_join_with_row_names(self): left = Table(self.left_rows, self.left_columns, row_names='three') new_table = left.join(self.right, 'one', 'four') self.assertSequenceEqual(new_table.rows['a'], (1, 4, 'a', 4, 'a')) self.assertSequenceEqual(new_table.row_names, ('a', 'b', 'c'))
class TestJoin(AgateTestCase): def setUp(self): self.left_rows = ( (1, 4, 'a'), (2, 3, 'b'), (None, 2, 'c') ) self.right_rows = ( (1, 4, 'a'), (2, 3, 'b'), (None, 2, 'c') ) self.number_type = Number() self.text_type = Text() self.left_column_names = ['one', 'two', 'three'] self.right_column_names = ['four', 'five', 'six'] self.column_types = [self.number_type, self.number_type, self.text_type] self.left = Table(self.left_rows, self.left_column_names, self.column_types) self.right = Table(self.right_rows, self.right_column_names, self.column_types) def test_join(self): new_table = self.left.join(self.right, 'one', 'four') self.assertIsNot(new_table, self.left) self.assertIsNot(new_table, self.right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'five', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text]) self.assertRows(new_table, [ (1, 4, 'a', 4, 'a'), (2, 3, 'b', 3, 'b'), (None, 2, 'c', 2, 'c') ]) def test_join_match_multiple(self): left_rows = ( (1, 4, 'a'), (2, 3, 'b') ) right_rows = ( (1, 1, 'a'), (1, 2, 'a'), (2, 2, 'b') ) left = Table(left_rows, self.left_column_names, self.column_types) right = Table(right_rows, self.right_column_names, self.column_types) new_table = left.join(right, 'one', 'five') self.assertIsNot(new_table, left) self.assertIsNot(new_table, right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text]) self.assertRows(new_table, [ (1, 4, 'a', 1, 'a'), (2, 3, 'b', 1, 'a'), (2, 3, 'b', 2, 'b') ]) def test_join2(self): new_table = self.left.join(self.right, 'one', 'five') self.assertIsNot(new_table, self.left) self.assertIsNot(new_table, self.right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text]) self.assertRows(new_table, [ (1, 4, 'a', None, None), (2, 3, 'b', None, 'c'), (None, 2, 'c', None, None) ]) def test_join_same_column_name(self): right_column_names = ['four', 'one', 'six'] right = Table(self.right_rows, right_column_names, self.column_types) new_table = self.left.join(right, 'one') self.assertIsNot(new_table, self.left) self.assertIsNot(new_table, right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text]) self.assertRows(new_table, [ (1, 4, 'a', None, None), (2, 3, 'b', None, 'c'), (None, 2, 'c', None, None) ]) def test_join_multiple_columns(self): new_table = self.left.join( self.right, ['two', 'three'], ['five', 'six'] ) self.assertIsNot(new_table, self.left) self.assertIsNot(new_table, self.right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'four']) self.assertColumnTypes(new_table, [Number, Number, Text, Number]) self.assertRows(new_table, [ (1, 4, 'a', 1), (2, 3, 'b', 2), (None, 2, 'c', None) ]) def test_join_func(self): new_table = self.left.join( self.right, lambda left: '%i%s' % (left['two'], left['three']), lambda right: '%i%s' % (right['five'], right['six']) ) self.assertIsNot(new_table, self.left) self.assertIsNot(new_table, self.right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'five', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Number, Number, Text]) self.assertRows(new_table, [ (1, 4, 'a', 1, 4, 'a'), (2, 3, 'b', 2, 3, 'b'), (None, 2, 'c', None, 2, 'c') ]) def test_join_column_does_not_exist(self): with self.assertRaises(KeyError): self.left.join(self.right, 'one', 'seven') def test_inner_join(self): new_table = self.left.join(self.right, 'one', 'four', inner=True) self.assertIsNot(new_table, self.left) self.assertIsNot(new_table, self.right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'five', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text]) self.assertRows(new_table, [ (1, 4, 'a', 4, 'a'), (2, 3, 'b', 3, 'b'), (None, 2, 'c', 2, 'c') ]) def test_inner_join2(self): new_table = self.left.join(self.right, 'one', 'five', inner=True) self.assertIsNot(new_table, self.left) self.assertIsNot(new_table, self.right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text]) self.assertRows(new_table, [ (2, 3, 'b', None, 'c') ]) def test_inner_join_same_column_name(self): right_column_names = ['four', 'one', 'six'] right = Table(self.right_rows, right_column_names, self.column_types) new_table = self.left.join(right, 'one', inner=True) self.assertIsNot(new_table, self.left) self.assertIsNot(new_table, right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text]) self.assertRows(new_table, [ (2, 3, 'b', None, 'c') ]) def test_inner_join_func(self): new_table = self.left.join( self.right, lambda left: '%i%s' % (left['two'], left['three']), lambda right: '%i%s' % (right['five'], right['six']), inner=True ) self.assertIsNot(new_table, self.left) self.assertIsNot(new_table, self.right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'five', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Number, Number, Text]) self.assertRows(new_table, [ (1, 4, 'a', 1, 4, 'a') ]) def test_join_with_row_names(self): left = Table(self.left_rows, self.left_column_names, self.column_types, row_names='three') new_table = left.join(self.right, 'one', 'four') self.assertRowNames(new_table, ('a', 'b', 'c')) def test_join_require_match(self): with self.assertRaises(ValueError): new_table = self.left.join(self.right, 'one', 'five', require_match=True) # noqa with self.assertRaises(ValueError): new_table = self.left.join(self.right, 'one', 'five', require_match=True) # noqa new_table = self.left.join(self.right, 'one', 'four', require_match=True) # noqa def test_join_columns_kwarg(self): new_table = self.left.join(self.right, 'one', 'four', columns=['six']) self.assertIsNot(new_table, self.left) self.assertIsNot(new_table, self.right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Text]) self.assertRows(new_table, [ (1, 4, 'a', 'a'), (2, 3, 'b', 'b'), (None, 2, 'c', 'c') ]) def test_join_columns_kwarg_right_key(self): new_table = self.left.join(self.right, 'one', 'four', columns=['four', 'six']) self.assertIsNot(new_table, self.left) self.assertIsNot(new_table, self.right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text]) self.assertRows(new_table, [ (1, 4, 'a', 1, 'a'), (2, 3, 'b', 2, 'b'), (None, 2, 'c', None, 'c') ])
class TestJoin(AgateTestCase): def setUp(self): self.left_rows = ( (1, 4, 'a'), (2, 3, 'b'), (None, 2, 'c') ) self.right_rows = ( (1, 4, 'a'), (2, 3, 'b'), (None, 2, 'c') ) self.number_type = Number() self.text_type = Text() self.left_column_names = ['one', 'two', 'three'] self.right_column_names = ['four', 'five', 'six'] self.column_types = [self.number_type, self.number_type, self.text_type] self.left = Table(self.left_rows, self.left_column_names, self.column_types) self.right = Table(self.right_rows, self.right_column_names, self.column_types) def test_join(self): new_table = self.left.join(self.right, 'one', 'four') self.assertIsNot(new_table, self.left) self.assertIsNot(new_table, self.right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'five', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text]) self.assertRows(new_table, [ (1, 4, 'a', 4, 'a'), (2, 3, 'b', 3, 'b'), (None, 2, 'c', 2, 'c') ]) def test_join_column_indicies(self): new_table = self.left.join(self.right, 0, 0) self.assertIsNot(new_table, self.left) self.assertIsNot(new_table, self.right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'five', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text]) self.assertRows(new_table, [ (1, 4, 'a', 4, 'a'), (2, 3, 'b', 3, 'b'), (None, 2, 'c', 2, 'c') ]) def test_join_match_multiple(self): left_rows = ( (1, 4, 'a'), (2, 3, 'b') ) right_rows = ( (1, 1, 'a'), (1, 2, 'a'), (2, 2, 'b') ) left = Table(left_rows, self.left_column_names, self.column_types) right = Table(right_rows, self.right_column_names, self.column_types) new_table = left.join(right, 'one', 'five') self.assertIsNot(new_table, left) self.assertIsNot(new_table, right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text]) self.assertRows(new_table, [ (1, 4, 'a', 1, 'a'), (2, 3, 'b', 1, 'a'), (2, 3, 'b', 2, 'b') ]) def test_join2(self): new_table = self.left.join(self.right, 'one', 'five') self.assertIsNot(new_table, self.left) self.assertIsNot(new_table, self.right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text]) self.assertRows(new_table, [ (1, 4, 'a', None, None), (2, 3, 'b', None, 'c'), (None, 2, 'c', None, None) ]) def test_join_same_column_name(self): right_column_names = ['four', 'one', 'six'] right = Table(self.right_rows, right_column_names, self.column_types) new_table = self.left.join(right, 'one') self.assertIsNot(new_table, self.left) self.assertIsNot(new_table, right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text]) self.assertRows(new_table, [ (1, 4, 'a', None, None), (2, 3, 'b', None, 'c'), (None, 2, 'c', None, None) ]) def test_join_multiple_columns(self): new_table = self.left.join( self.right, ['two', 'three'], ['five', 'six'] ) self.assertIsNot(new_table, self.left) self.assertIsNot(new_table, self.right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'four']) self.assertColumnTypes(new_table, [Number, Number, Text, Number]) self.assertRows(new_table, [ (1, 4, 'a', 1), (2, 3, 'b', 2), (None, 2, 'c', None) ]) def test_join_func(self): new_table = self.left.join( self.right, lambda left: '%i%s' % (left['two'], left['three']), lambda right: '%i%s' % (right['five'], right['six']) ) self.assertIsNot(new_table, self.left) self.assertIsNot(new_table, self.right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'five', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Number, Number, Text]) self.assertRows(new_table, [ (1, 4, 'a', 1, 4, 'a'), (2, 3, 'b', 2, 3, 'b'), (None, 2, 'c', None, 2, 'c') ]) def test_join_column_does_not_exist(self): with self.assertRaises(KeyError): self.left.join(self.right, 'one', 'seven') def test_inner_join(self): new_table = self.left.join(self.right, 'one', 'four', inner=True) self.assertIsNot(new_table, self.left) self.assertIsNot(new_table, self.right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'five', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text]) self.assertRows(new_table, [ (1, 4, 'a', 4, 'a'), (2, 3, 'b', 3, 'b'), (None, 2, 'c', 2, 'c') ]) def test_inner_join2(self): new_table = self.left.join(self.right, 'one', 'five', inner=True) self.assertIsNot(new_table, self.left) self.assertIsNot(new_table, self.right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text]) self.assertRows(new_table, [ (2, 3, 'b', None, 'c') ]) def test_inner_join_same_column_name(self): right_column_names = ['four', 'one', 'six'] right = Table(self.right_rows, right_column_names, self.column_types) new_table = self.left.join(right, 'one', inner=True) self.assertIsNot(new_table, self.left) self.assertIsNot(new_table, right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text]) self.assertRows(new_table, [ (2, 3, 'b', None, 'c') ]) def test_inner_join_func(self): new_table = self.left.join( self.right, lambda left: '%i%s' % (left['two'], left['three']), lambda right: '%i%s' % (right['five'], right['six']), inner=True ) self.assertIsNot(new_table, self.left) self.assertIsNot(new_table, self.right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'five', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Number, Number, Text]) self.assertRows(new_table, [ (1, 4, 'a', 1, 4, 'a') ]) def test_join_with_row_names(self): left = Table(self.left_rows, self.left_column_names, self.column_types, row_names='three') new_table = left.join(self.right, 'one', 'four') self.assertRowNames(new_table, ('a', 'b', 'c')) def test_join_require_match(self): with self.assertRaises(ValueError): self.left.join(self.right, 'one', 'five', require_match=True) with self.assertRaises(ValueError): self.left.join(self.right, 'one', 'five', require_match=True) new_table = self.left.join(self.right, 'one', 'four', require_match=True) # noqa def test_join_columns_kwarg(self): new_table = self.left.join(self.right, 'one', 'four', columns=['six']) self.assertIsNot(new_table, self.left) self.assertIsNot(new_table, self.right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Text]) self.assertRows(new_table, [ (1, 4, 'a', 'a'), (2, 3, 'b', 'b'), (None, 2, 'c', 'c') ]) def test_join_columns_kwarg_right_key(self): new_table = self.left.join(self.right, 'one', 'four', columns=['four', 'six']) self.assertIsNot(new_table, self.left) self.assertIsNot(new_table, self.right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text]) self.assertRows(new_table, [ (1, 4, 'a', 1, 'a'), (2, 3, 'b', 2, 'b'), (None, 2, 'c', None, 'c') ]) def test_join_rows_are_tuples(self): new_table = self.left.join(self.right, 'one', 'four', columns=['four', 'six']) self.assertIsInstance(new_table.rows[0].values(), tuple) def test_full_outer(self): left_rows = ( (1, 4, 'a'), (2, 3, 'b'), (3, 2, 'c') ) right_rows = ( (1, 4, 'a'), (2, 3, 'b'), (4, 2, 'c') ) left = Table(left_rows, self.left_column_names, self.column_types) right = Table(right_rows, self.right_column_names, self.column_types) new_table = left.join(right, 'one', 'four', full_outer=True) self.assertIsNot(new_table, left) self.assertIsNot(new_table, right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'five', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Number, Number, Text]) self.assertRows(new_table, [ (1, 4, 'a', 1, 4, 'a'), (2, 3, 'b', 2, 3, 'b'), (3, 2, 'c', None, None, None), (None, None, None, 4, 2, 'c') ]) def test_join_by_row_number(self): new_table = self.left.join(self.right, full_outer=True) self.assertIsNot(new_table, self.left) self.assertIsNot(new_table, self.right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'five', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Number, Number, Text]) self.assertRows(new_table, [ (1, 4, 'a', 1, 4, 'a'), (2, 3, 'b', 2, 3, 'b'), (None, 2, 'c', None, 2, 'c') ]) def test_join_by_row_number_short_right(self): right_rows = self.right_rows + ((7, 9, 'z'),) right = Table(right_rows, self.right_column_names, self.column_types) new_table = self.left.join(right, full_outer=True) self.assertIsNot(new_table, self.left) self.assertIsNot(new_table, right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'five', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Number, Number, Text]) self.assertRows(new_table, [ (1, 4, 'a', 1, 4, 'a'), (2, 3, 'b', 2, 3, 'b'), (None, 2, 'c', None, 2, 'c'), (None, None, None, 7, 9, 'z') ]) def test_join_by_row_number_short_left(self): left_rows = self.left_rows + ((7, 9, 'z'),) left = Table(left_rows, self.left_column_names, self.column_types) new_table = left.join(self.right, full_outer=True) self.assertIsNot(new_table, left) self.assertIsNot(new_table, self.right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'five', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Number, Number, Text]) self.assertRows(new_table, [ (1, 4, 'a', 1, 4, 'a'), (2, 3, 'b', 2, 3, 'b'), (None, 2, 'c', None, 2, 'c'), (7, 9, 'z', None, None, None) ])
def test_join_with_row_names(self): left = Table(self.left_rows, self.left_column_names, self.column_types, row_names='three') new_table = left.join(self.right, 'one', 'four') self.assertSequenceEqual(new_table.rows['a'], (1, 4, 'a', 4, 'a')) self.assertSequenceEqual(new_table.row_names, ('a', 'b', 'c'))
class TestTableJoin(unittest.TestCase): def setUp(self): self.left_rows = ( (1, 4, 'a'), (2, 3, 'b'), (None, 2, 'c') ) self.right_rows = ( (1, 4, 'a'), (2, 3, 'b'), (None, 2, 'c') ) self.number_type = Number() self.text_type = Text() self.left_column_names = ['one', 'two', 'three'] self.right_column_names = ['four', 'five', 'six'] self.column_types = [self.number_type, self.number_type, self.text_type] self.left = Table(self.left_rows, self.left_column_names, self.column_types) self.right = Table(self.right_rows, self.right_column_names, self.column_types) def test_join(self): new_table = self.left.join(self.right, 'one', 'four') self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 5) self.assertEqual(new_table.columns[0].name, 'one') self.assertEqual(new_table.columns[1].name, 'two') self.assertEqual(new_table.columns[2].name, 'three') self.assertEqual(new_table.columns[3].name, 'five') self.assertEqual(new_table.columns[4].name, 'six') self.assertIsInstance(new_table.columns[0].data_type, Number) self.assertIsInstance(new_table.columns[1].data_type, Number) self.assertIsInstance(new_table.columns[2].data_type, Text) self.assertIsInstance(new_table.columns[3].data_type, Number) self.assertIsInstance(new_table.columns[4].data_type, Text) self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a', 4, 'a')) self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b', 3, 'b')) self.assertSequenceEqual(new_table.rows[2], (None, 2, 'c', 2, 'c')) def test_join_match_multiple(self): left_rows = ( (1, 4, 'a'), (2, 3, 'b') ) right_rows = ( (1, 1, 'a'), (1, 2, 'a'), (2, 2, 'b') ) left = Table(left_rows, self.left_column_names, self.column_types) right = Table(right_rows, self.right_column_names, self.column_types) new_table = left.join(right, 'one', 'five') self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 5) self.assertEqual(new_table.columns[0].name, 'one') self.assertEqual(new_table.columns[1].name, 'two') self.assertEqual(new_table.columns[2].name, 'three') self.assertEqual(new_table.columns[3].name, 'four') self.assertEqual(new_table.columns[4].name, 'six') self.assertIsInstance(new_table.columns[0].data_type, Number) self.assertIsInstance(new_table.columns[1].data_type, Number) self.assertIsInstance(new_table.columns[2].data_type, Text) self.assertIsInstance(new_table.columns[3].data_type, Number) self.assertIsInstance(new_table.columns[4].data_type, Text) self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a', 1, 'a')) self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b', 1, 'a')) self.assertSequenceEqual(new_table.rows[2], (2, 3, 'b', 2, 'b')) def test_join2(self): new_table = self.left.join(self.right, 'one', 'five') self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 5) self.assertEqual(new_table.columns[0].name, 'one') self.assertEqual(new_table.columns[1].name, 'two') self.assertEqual(new_table.columns[2].name, 'three') self.assertEqual(new_table.columns[3].name, 'four') self.assertEqual(new_table.columns[4].name, 'six') self.assertIsInstance(new_table.columns[0].data_type, Number) self.assertIsInstance(new_table.columns[1].data_type, Number) self.assertIsInstance(new_table.columns[2].data_type, Text) self.assertIsInstance(new_table.columns[3].data_type, Number) self.assertIsInstance(new_table.columns[4].data_type, Text) self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a', None, None)) self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b', None, 'c')) self.assertSequenceEqual(new_table.rows[2], (None, 2, 'c', None, None)) def test_join_same_column_name(self): right_column_names = ['four', 'one', 'six'] right = Table(self.right_rows, right_column_names, self.column_types) new_table = self.left.join(right, 'one') self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 5) self.assertEqual(new_table.columns[0].name, 'one') self.assertEqual(new_table.columns[1].name, 'two') self.assertEqual(new_table.columns[2].name, 'three') self.assertEqual(new_table.columns[3].name, 'four') self.assertEqual(new_table.columns[4].name, 'six') self.assertIsInstance(new_table.columns[0].data_type, Number) self.assertIsInstance(new_table.columns[1].data_type, Number) self.assertIsInstance(new_table.columns[2].data_type, Text) self.assertIsInstance(new_table.columns[3].data_type, Number) self.assertIsInstance(new_table.columns[4].data_type, Text) self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a', None, None)) self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b', None, 'c')) self.assertSequenceEqual(new_table.rows[2], (None, 2, 'c', None, None)) def test_join_func(self): new_table = self.left.join( self.right, lambda left: '%i%s' % (left['two'], left['three']), lambda right: '%i%s' % (right['five'], right['six']) ) self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 6) self.assertEqual(new_table.columns[0].name, 'one') self.assertEqual(new_table.columns[1].name, 'two') self.assertEqual(new_table.columns[2].name, 'three') self.assertEqual(new_table.columns[3].name, 'four') self.assertEqual(new_table.columns[4].name, 'five') self.assertEqual(new_table.columns[5].name, 'six') self.assertIsInstance(new_table.columns[0].data_type, Number) self.assertIsInstance(new_table.columns[1].data_type, Number) self.assertIsInstance(new_table.columns[2].data_type, Text) self.assertIsInstance(new_table.columns[3].data_type, Number) self.assertIsInstance(new_table.columns[4].data_type, Number) self.assertIsInstance(new_table.columns[5].data_type, Text) self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a', 1, 4, 'a')) self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b', 2, 3, 'b')) self.assertSequenceEqual(new_table.rows[2], (None, 2, 'c', None, 2, 'c')) def test_join_column_does_not_exist(self): with self.assertRaises(KeyError): self.left.join(self.right, 'one', 'seven') def test_inner_join(self): new_table = self.left.join(self.right, 'one', 'four', inner=True) self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 5) self.assertEqual(new_table.columns[0].name, 'one') self.assertEqual(new_table.columns[1].name, 'two') self.assertEqual(new_table.columns[2].name, 'three') self.assertEqual(new_table.columns[3].name, 'five') self.assertEqual(new_table.columns[4].name, 'six') self.assertIsInstance(new_table.columns[0].data_type, Number) self.assertIsInstance(new_table.columns[1].data_type, Number) self.assertIsInstance(new_table.columns[2].data_type, Text) self.assertIsInstance(new_table.columns[3].data_type, Number) self.assertIsInstance(new_table.columns[4].data_type, Text) self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a', 4, 'a')) self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b', 3, 'b')) self.assertSequenceEqual(new_table.rows[2], (None, 2, 'c', 2, 'c')) def test_inner_join2(self): new_table = self.left.join(self.right, 'one', 'five', inner=True) self.assertEqual(len(new_table.rows), 1) self.assertEqual(len(new_table.columns), 5) self.assertEqual(new_table.columns[0].name, 'one') self.assertEqual(new_table.columns[1].name, 'two') self.assertEqual(new_table.columns[2].name, 'three') self.assertEqual(new_table.columns[3].name, 'four') self.assertEqual(new_table.columns[4].name, 'six') self.assertIsInstance(new_table.columns[0].data_type, Number) self.assertIsInstance(new_table.columns[1].data_type, Number) self.assertIsInstance(new_table.columns[2].data_type, Text) self.assertIsInstance(new_table.columns[3].data_type, Number) self.assertIsInstance(new_table.columns[4].data_type, Text) self.assertSequenceEqual(new_table.rows[0], (2, 3, 'b', None, 'c')) def test_inner_join_same_column_name(self): right_column_names = ['four', 'one', 'six'] right = Table(self.right_rows, right_column_names, self.column_types) new_table = self.left.join(right, 'one', inner=True) self.assertEqual(len(new_table.rows), 1) self.assertEqual(len(new_table.columns), 5) self.assertEqual(new_table.columns[0].name, 'one') self.assertEqual(new_table.columns[1].name, 'two') self.assertEqual(new_table.columns[2].name, 'three') self.assertEqual(new_table.columns[3].name, 'four') self.assertEqual(new_table.columns[4].name, 'six') self.assertIsInstance(new_table.columns[0].data_type, Number) self.assertIsInstance(new_table.columns[1].data_type, Number) self.assertIsInstance(new_table.columns[2].data_type, Text) self.assertIsInstance(new_table.columns[3].data_type, Number) self.assertIsInstance(new_table.columns[4].data_type, Text) self.assertSequenceEqual(new_table.rows[0], (2, 3, 'b', None, 'c')) def test_inner_join_func(self): new_table = self.left.join( self.right, lambda left: '%i%s' % (left['two'], left['three']), lambda right: '%i%s' % (right['five'], right['six']), inner=True ) self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 6) self.assertEqual(new_table.columns[0].name, 'one') self.assertEqual(new_table.columns[1].name, 'two') self.assertEqual(new_table.columns[2].name, 'three') self.assertEqual(new_table.columns[3].name, 'four') self.assertEqual(new_table.columns[4].name, 'five') self.assertEqual(new_table.columns[5].name, 'six') self.assertIsInstance(new_table.columns[0].data_type, Number) self.assertIsInstance(new_table.columns[1].data_type, Number) self.assertIsInstance(new_table.columns[2].data_type, Text) self.assertIsInstance(new_table.columns[3].data_type, Number) self.assertIsInstance(new_table.columns[4].data_type, Number) self.assertIsInstance(new_table.columns[5].data_type, Text) def test_join_with_row_names(self): left = Table(self.left_rows, self.left_column_names, self.column_types, row_names='three') new_table = left.join(self.right, 'one', 'four') self.assertSequenceEqual(new_table.rows['a'], (1, 4, 'a', 4, 'a')) self.assertSequenceEqual(new_table.row_names, ('a', 'b', 'c'))
class VoteWatcher(object): """ Example usage: # Init vote watcher x = VoteWatch(128,"https://www.eduskunta.fi/FI/vaski/Sivut/Aanestys.aspx?aanestysnro=19&istuntonro=82&vuosi=2015") # Print a table with all MP's x.table_all.to_csv("table.csv") # Print a table with only people that changed opinions x.table_diff.to_csv("table_change.csv") """ def __init__(self, question_id, vote_url, reverse=False): """ - question_id : an id of a question as defined in questions.py - vote_url : a url the the voting record of on eduskunta.fi ie. https://www.eduskunta.fi/FI/vaski/Sivut/Aanestys.aspx?aanestysnro=19&istuntonro=82&vuosi=2015 - reverse : Set to true if "agree" in election mahcine equals "no" in parliament vote. """ self.vote_url = vote_url self.reverse = reverse try: self.question = QUESTIONS[question_id] except KeyError: raise ValueError("No such question id") self._get_promise_table() self._get_vote_table() self._compare_answers() def _get_promise_table(self): """ Get the answers from the voting advice application """ print "Get election machine data (YLE)" table = Table.from_csv("yle-vaalikone-2015.csv")\ .where(lambda row: row["valittu"] == 1)\ .select(["nimi", "puolue", self.question]) self.promise_table = rename_column(table, self.question, "promise") def _get_vote_table(self): """ Get the votes of every mp """ print "Get votes from eduskunta.fi" resp = requests.get(self.vote_url) soup = BeautifulSoup(resp.text, "html.parser") """ TODO: Make a better selector """ _rows = [] trs = soup.find_all("li", {"class": "expand"})[1].find("table").find_all("tr") for row in trs: cells = row.find_all("td") name_cell = cells[0].text.split("/") name = name_cell[0].strip() party = name_cell[1].strip() vote = cells[1].text _rows.append([name, party, vote]) self.vote_table = Table(_rows, column_names=["name", "party", "vote"]) def _compare_answers(self): """ Check if the answers in the voting mahcine correspond with the vote. """ print "Compare promises and votes" table = self.vote_table.join(self.promise_table,"name", right_key="nimi") self.table_all = table.compute([ ("comparison", AnswerComparison()) ]) self.table_diff = self.table_all\ .where(lambda row: row['comparison'] == "different opinion") print "Found %s MP's that didn't vote according to their promises." % len(self.table_diff.rows)