示例#1
0
文件: test_join.py 项目: 01-/agate
    def test_join_match_multiple(self):
        left_rows = (
            (1, 4, 'a'),
            (2, 3, 'b')
        )

        right_rows = (
            (1, 1, 'a'),
            (1, 2, 'a'),
            (2, 2, 'b')
        )

        left = Table(left_rows, self.left_column_names, self.column_types)
        right = Table(right_rows, self.right_column_names, self.column_types)
        new_table = left.join(right, 'one', 'five')

        self.assertIsNot(new_table, left)
        self.assertIsNot(new_table, right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text])
        self.assertRows(new_table, [
            (1, 4, 'a', 1, 'a'),
            (2, 3, 'b', 1, 'a'),
            (2, 3, 'b', 2, 'b')
        ])
示例#2
0
    def test_join_match_multiple(self):
        left_rows = (
            (1, 4, 'a'),
            (2, 3, 'b')
        )

        right_rows = (
            (1, 1, 'a'),
            (1, 2, 'a'),
            (2, 2, 'b')
        )

        left = Table(left_rows, self.left_column_names, self.column_types)
        right = Table(right_rows, self.right_column_names, self.column_types)
        new_table = left.join(right, 'one', 'five')

        self.assertEqual(len(new_table.rows), 3)
        self.assertEqual(len(new_table.columns), 5)

        self.assertEqual(new_table.columns[0].name, 'one')
        self.assertEqual(new_table.columns[1].name, 'two')
        self.assertEqual(new_table.columns[2].name, 'three')
        self.assertEqual(new_table.columns[3].name, 'four')
        self.assertEqual(new_table.columns[4].name, 'six')

        self.assertIsInstance(new_table.columns[0].data_type, Number)
        self.assertIsInstance(new_table.columns[1].data_type, Number)
        self.assertIsInstance(new_table.columns[2].data_type, Text)
        self.assertIsInstance(new_table.columns[3].data_type, Number)
        self.assertIsInstance(new_table.columns[4].data_type, Text)

        self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a', 1, 'a'))
        self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b', 1, 'a'))
        self.assertSequenceEqual(new_table.rows[2], (2, 3, 'b', 2, 'b'))
示例#3
0
    def test_join_match_multiple(self):
        left_rows = (
            (1, 4, 'a'),
            (2, 3, 'b')
        )

        right_rows = (
            (1, 1, 'a'),
            (1, 2, 'a'),
            (2, 2, 'b')
        )

        left = Table(left_rows, self.left_columns)
        right = Table(right_rows, self.right_columns)
        new_table = left.join(right, 'one', 'five')

        self.assertEqual(len(new_table.rows), 3)
        self.assertEqual(len(new_table.columns), 5)

        self.assertEqual(new_table.columns[0].name, 'one')
        self.assertEqual(new_table.columns[1].name, 'two')
        self.assertEqual(new_table.columns[2].name, 'three')
        self.assertEqual(new_table.columns[3].name, 'four')
        self.assertEqual(new_table.columns[4].name, 'six')

        self.assertIsInstance(new_table.columns[0].data_type, Number)
        self.assertIsInstance(new_table.columns[1].data_type, Number)
        self.assertIsInstance(new_table.columns[2].data_type, Text)
        self.assertIsInstance(new_table.columns[3].data_type, Number)
        self.assertIsInstance(new_table.columns[4].data_type, Text)

        self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a', 1, 'a'))
        self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b', 1, 'a'))
        self.assertSequenceEqual(new_table.rows[2], (2, 3, 'b', 2, 'b'))
示例#4
0
    def test_full_outer(self):
        left_rows = (
            (1, 4, 'a'),
            (2, 3, 'b'),
            (3, 2, 'c')
        )

        right_rows = (
            (1, 4, 'a'),
            (2, 3, 'b'),
            (4, 2, 'c')
        )

        left = Table(left_rows, self.left_column_names, self.column_types)
        right = Table(right_rows, self.right_column_names, self.column_types)

        new_table = left.join(right, 'one', 'four', full_outer=True)

        self.assertIsNot(new_table, left)
        self.assertIsNot(new_table, right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'five', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number, Number, Text])
        self.assertRows(new_table, [
            (1, 4, 'a', 1, 4, 'a'),
            (2, 3, 'b', 2, 3, 'b'),
            (3, 2, 'c', None, None, None),
            (None, None, None, 4, 2, 'c')
        ])
示例#5
0
    def test_full_outer(self):
        left_rows = (
            (1, 4, 'a'),
            (2, 3, 'b'),
            (3, 2, 'c')
        )

        right_rows = (
            (1, 4, 'a'),
            (2, 3, 'b'),
            (4, 2, 'c')
        )

        left = Table(left_rows, self.left_column_names, self.column_types)
        right = Table(right_rows, self.right_column_names, self.column_types)

        new_table = left.join(right, 'one', 'four', full_outer=True)

        self.assertIsNot(new_table, left)
        self.assertIsNot(new_table, right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'five', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number, Number, Text])
        self.assertRows(new_table, [
            (1, 4, 'a', 1, 4, 'a'),
            (2, 3, 'b', 2, 3, 'b'),
            (3, 2, 'c', None, None, None),
            (None, None, None, 4, 2, 'c')
        ])
示例#6
0
    def test_join_match_multiple(self):
        left_rows = (
            (1, 4, 'a'),
            (2, 3, 'b')
        )

        right_rows = (
            (1, 1, 'a'),
            (1, 2, 'a'),
            (2, 2, 'b')
        )

        left = Table(left_rows, self.left_column_names, self.column_types)
        right = Table(right_rows, self.right_column_names, self.column_types)
        new_table = left.join(right, 'one', 'five')

        self.assertIsNot(new_table, left)
        self.assertIsNot(new_table, right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text])
        self.assertRows(new_table, [
            (1, 4, 'a', 1, 'a'),
            (2, 3, 'b', 1, 'a'),
            (2, 3, 'b', 2, 'b')
        ])
示例#7
0
    def test_join_with_row_names(self):
        left = Table(self.left_rows,
                     self.left_column_names,
                     self.column_types,
                     row_names='three')
        new_table = left.join(self.right, 'one', 'four')

        self.assertRowNames(new_table, ('a', 'b', 'c'))
示例#8
0
    def test_join_by_row_number_short_left(self):
        left_rows = self.left_rows + ((7, 9, 'z'),)
        left = Table(left_rows, self.left_column_names, self.column_types)

        new_table = left.join(self.right, full_outer=True)

        self.assertIsNot(new_table, left)
        self.assertIsNot(new_table, self.right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'five', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number, Number, Text])
        self.assertRows(new_table, [
            (1, 4, 'a', 1, 4, 'a'),
            (2, 3, 'b', 2, 3, 'b'),
            (None, 2, 'c', None, 2, 'c'),
            (7, 9, 'z', None, None, None)
        ])
示例#9
0
    def test_join_by_row_number_short_left(self):
        left_rows = self.left_rows + ((7, 9, 'z'),)
        left = Table(left_rows, self.left_column_names, self.column_types)

        new_table = left.join(self.right, full_outer=True)

        self.assertIsNot(new_table, left)
        self.assertIsNot(new_table, self.right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'five', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number, Number, Text])
        self.assertRows(new_table, [
            (1, 4, 'a', 1, 4, 'a'),
            (2, 3, 'b', 2, 3, 'b'),
            (None, 2, 'c', None, 2, 'c'),
            (7, 9, 'z', None, None, None)
        ])
示例#10
0
    def test_join_with_row_names(self):
        left = Table(self.left_rows, self.left_columns, row_names='three')
        new_table = left.join(self.right, 'one', 'four')

        self.assertSequenceEqual(new_table.rows['a'], (1, 4, 'a', 4, 'a'))
        self.assertSequenceEqual(new_table.row_names, ('a', 'b', 'c'))
示例#11
0
class TestTableJoin(unittest.TestCase):
    def setUp(self):
        self.left_rows = (
            (1, 4, 'a'),
            (2, 3, 'b'),
            (None, 2, 'c')
        )

        self.right_rows = (
            (1, 4, 'a'),
            (2, 3, 'b'),
            (None, 2, 'c')
        )

        self.number_type = Number()
        self.text_type = Text()

        self.left_columns = (
            ('one', self.number_type),
            ('two', self.number_type),
            ('three', self.text_type)
        )

        self.right_columns = (
            ('four', self.number_type),
            ('five', self.number_type),
            ('six', self.text_type)
        )

        self.left = Table(self.left_rows, self.left_columns)
        self.right = Table(self.right_rows, self.right_columns)

    def test_join(self):
        new_table = self.left.join(self.right, 'one', 'four')

        self.assertEqual(len(new_table.rows), 3)
        self.assertEqual(len(new_table.columns), 5)

        self.assertEqual(new_table.columns[0].name, 'one')
        self.assertEqual(new_table.columns[1].name, 'two')
        self.assertEqual(new_table.columns[2].name, 'three')
        self.assertEqual(new_table.columns[3].name, 'five')
        self.assertEqual(new_table.columns[4].name, 'six')

        self.assertIsInstance(new_table.columns[0].data_type, Number)
        self.assertIsInstance(new_table.columns[1].data_type, Number)
        self.assertIsInstance(new_table.columns[2].data_type, Text)
        self.assertIsInstance(new_table.columns[3].data_type, Number)
        self.assertIsInstance(new_table.columns[4].data_type, Text)

        self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a', 4, 'a'))
        self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b', 3, 'b'))
        self.assertSequenceEqual(new_table.rows[2], (None, 2, 'c', 2, 'c'))

    def test_join_match_multiple(self):
        left_rows = (
            (1, 4, 'a'),
            (2, 3, 'b')
        )

        right_rows = (
            (1, 1, 'a'),
            (1, 2, 'a'),
            (2, 2, 'b')
        )

        left = Table(left_rows, self.left_columns)
        right = Table(right_rows, self.right_columns)
        new_table = left.join(right, 'one', 'five')

        self.assertEqual(len(new_table.rows), 3)
        self.assertEqual(len(new_table.columns), 5)

        self.assertEqual(new_table.columns[0].name, 'one')
        self.assertEqual(new_table.columns[1].name, 'two')
        self.assertEqual(new_table.columns[2].name, 'three')
        self.assertEqual(new_table.columns[3].name, 'four')
        self.assertEqual(new_table.columns[4].name, 'six')

        self.assertIsInstance(new_table.columns[0].data_type, Number)
        self.assertIsInstance(new_table.columns[1].data_type, Number)
        self.assertIsInstance(new_table.columns[2].data_type, Text)
        self.assertIsInstance(new_table.columns[3].data_type, Number)
        self.assertIsInstance(new_table.columns[4].data_type, Text)

        self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a', 1, 'a'))
        self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b', 1, 'a'))
        self.assertSequenceEqual(new_table.rows[2], (2, 3, 'b', 2, 'b'))

    def test_join2(self):
        new_table = self.left.join(self.right, 'one', 'five')

        self.assertEqual(len(new_table.rows), 3)
        self.assertEqual(len(new_table.columns), 5)

        self.assertEqual(new_table.columns[0].name, 'one')
        self.assertEqual(new_table.columns[1].name, 'two')
        self.assertEqual(new_table.columns[2].name, 'three')
        self.assertEqual(new_table.columns[3].name, 'four')
        self.assertEqual(new_table.columns[4].name, 'six')

        self.assertIsInstance(new_table.columns[0].data_type, Number)
        self.assertIsInstance(new_table.columns[1].data_type, Number)
        self.assertIsInstance(new_table.columns[2].data_type, Text)
        self.assertIsInstance(new_table.columns[3].data_type, Number)
        self.assertIsInstance(new_table.columns[4].data_type, Text)

        self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a', None, None))
        self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b', None, 'c'))
        self.assertSequenceEqual(new_table.rows[2], (None, 2, 'c', None, None))

    def test_join_same_column_name(self):
        right_columns = (
            ('four', self.number_type),
            ('one', self.number_type),
            ('six', self.text_type)
        )

        right = Table(self.right_rows, right_columns)

        new_table = self.left.join(right, 'one')

        self.assertEqual(len(new_table.rows), 3)
        self.assertEqual(len(new_table.columns), 5)

        self.assertEqual(new_table.columns[0].name, 'one')
        self.assertEqual(new_table.columns[1].name, 'two')
        self.assertEqual(new_table.columns[2].name, 'three')
        self.assertEqual(new_table.columns[3].name, 'four')
        self.assertEqual(new_table.columns[4].name, 'six')

        self.assertIsInstance(new_table.columns[0].data_type, Number)
        self.assertIsInstance(new_table.columns[1].data_type, Number)
        self.assertIsInstance(new_table.columns[2].data_type, Text)
        self.assertIsInstance(new_table.columns[3].data_type, Number)
        self.assertIsInstance(new_table.columns[4].data_type, Text)

        self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a', None, None))
        self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b', None, 'c'))
        self.assertSequenceEqual(new_table.rows[2], (None, 2, 'c', None, None))

    def test_join_func(self):
        new_table = self.left.join(
            self.right,
            lambda left: '%i%s' % (left['two'], left['three']),
            lambda right: '%i%s' % (right['five'], right['six'])
        )

        self.assertEqual(len(new_table.rows), 3)
        self.assertEqual(len(new_table.columns), 6)

        self.assertEqual(new_table.columns[0].name, 'one')
        self.assertEqual(new_table.columns[1].name, 'two')
        self.assertEqual(new_table.columns[2].name, 'three')
        self.assertEqual(new_table.columns[3].name, 'four')
        self.assertEqual(new_table.columns[4].name, 'five')
        self.assertEqual(new_table.columns[5].name, 'six')

        self.assertIsInstance(new_table.columns[0].data_type, Number)
        self.assertIsInstance(new_table.columns[1].data_type, Number)
        self.assertIsInstance(new_table.columns[2].data_type, Text)
        self.assertIsInstance(new_table.columns[3].data_type, Number)
        self.assertIsInstance(new_table.columns[4].data_type, Number)
        self.assertIsInstance(new_table.columns[5].data_type, Text)

        self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a', 1, 4, 'a'))
        self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b', 2, 3, 'b'))
        self.assertSequenceEqual(new_table.rows[2], (None, 2, 'c', None, 2, 'c'))

    def test_join_column_does_not_exist(self):
        with self.assertRaises(KeyError):
            self.left.join(self.right, 'one', 'seven')

    def test_inner_join(self):
        new_table = self.left.join(self.right, 'one', 'four', inner=True)

        self.assertEqual(len(new_table.rows), 3)
        self.assertEqual(len(new_table.columns), 5)

        self.assertEqual(new_table.columns[0].name, 'one')
        self.assertEqual(new_table.columns[1].name, 'two')
        self.assertEqual(new_table.columns[2].name, 'three')
        self.assertEqual(new_table.columns[3].name, 'five')
        self.assertEqual(new_table.columns[4].name, 'six')

        self.assertIsInstance(new_table.columns[0].data_type, Number)
        self.assertIsInstance(new_table.columns[1].data_type, Number)
        self.assertIsInstance(new_table.columns[2].data_type, Text)
        self.assertIsInstance(new_table.columns[3].data_type, Number)
        self.assertIsInstance(new_table.columns[4].data_type, Text)

        self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a', 4, 'a'))
        self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b', 3, 'b'))
        self.assertSequenceEqual(new_table.rows[2], (None, 2, 'c', 2, 'c'))

    def test_inner_join2(self):
        new_table = self.left.join(self.right, 'one', 'five', inner=True)

        self.assertEqual(len(new_table.rows), 1)
        self.assertEqual(len(new_table.columns), 5)

        self.assertEqual(new_table.columns[0].name, 'one')
        self.assertEqual(new_table.columns[1].name, 'two')
        self.assertEqual(new_table.columns[2].name, 'three')
        self.assertEqual(new_table.columns[3].name, 'four')
        self.assertEqual(new_table.columns[4].name, 'six')

        self.assertIsInstance(new_table.columns[0].data_type, Number)
        self.assertIsInstance(new_table.columns[1].data_type, Number)
        self.assertIsInstance(new_table.columns[2].data_type, Text)
        self.assertIsInstance(new_table.columns[3].data_type, Number)
        self.assertIsInstance(new_table.columns[4].data_type, Text)

        self.assertSequenceEqual(new_table.rows[0], (2, 3, 'b', None, 'c'))

    def test_inner_join_same_column_name(self):
        right_columns = (
            ('four', self.number_type),
            ('one', self.number_type),
            ('six', self.text_type)
        )

        right = Table(self.right_rows, right_columns)

        new_table = self.left.join(right, 'one', inner=True)

        self.assertEqual(len(new_table.rows), 1)
        self.assertEqual(len(new_table.columns), 5)

        self.assertEqual(new_table.columns[0].name, 'one')
        self.assertEqual(new_table.columns[1].name, 'two')
        self.assertEqual(new_table.columns[2].name, 'three')
        self.assertEqual(new_table.columns[3].name, 'four')
        self.assertEqual(new_table.columns[4].name, 'six')

        self.assertIsInstance(new_table.columns[0].data_type, Number)
        self.assertIsInstance(new_table.columns[1].data_type, Number)
        self.assertIsInstance(new_table.columns[2].data_type, Text)
        self.assertIsInstance(new_table.columns[3].data_type, Number)
        self.assertIsInstance(new_table.columns[4].data_type, Text)

        self.assertSequenceEqual(new_table.rows[0], (2, 3, 'b', None, 'c'))

    def test_inner_join_func(self):
        new_table = self.left.join(
            self.right,
            lambda left: '%i%s' % (left['two'], left['three']),
            lambda right: '%i%s' % (right['five'], right['six']),
            inner=True
        )

        self.assertEqual(len(new_table.rows), 3)
        self.assertEqual(len(new_table.columns), 6)

        self.assertEqual(new_table.columns[0].name, 'one')
        self.assertEqual(new_table.columns[1].name, 'two')
        self.assertEqual(new_table.columns[2].name, 'three')
        self.assertEqual(new_table.columns[3].name, 'four')
        self.assertEqual(new_table.columns[4].name, 'five')
        self.assertEqual(new_table.columns[5].name, 'six')

        self.assertIsInstance(new_table.columns[0].data_type, Number)
        self.assertIsInstance(new_table.columns[1].data_type, Number)
        self.assertIsInstance(new_table.columns[2].data_type, Text)
        self.assertIsInstance(new_table.columns[3].data_type, Number)
        self.assertIsInstance(new_table.columns[4].data_type, Number)
        self.assertIsInstance(new_table.columns[5].data_type, Text)

    def test_join_with_row_names(self):
        left = Table(self.left_rows, self.left_columns, row_names='three')
        new_table = left.join(self.right, 'one', 'four')

        self.assertSequenceEqual(new_table.rows['a'], (1, 4, 'a', 4, 'a'))
        self.assertSequenceEqual(new_table.row_names, ('a', 'b', 'c'))
示例#12
0
文件: test_join.py 项目: 01-/agate
class TestJoin(AgateTestCase):
    def setUp(self):
        self.left_rows = (
            (1, 4, 'a'),
            (2, 3, 'b'),
            (None, 2, 'c')
        )

        self.right_rows = (
            (1, 4, 'a'),
            (2, 3, 'b'),
            (None, 2, 'c')
        )

        self.number_type = Number()
        self.text_type = Text()

        self.left_column_names = ['one', 'two', 'three']
        self.right_column_names = ['four', 'five', 'six']
        self.column_types = [self.number_type, self.number_type, self.text_type]

        self.left = Table(self.left_rows, self.left_column_names, self.column_types)
        self.right = Table(self.right_rows, self.right_column_names, self.column_types)

    def test_join(self):
        new_table = self.left.join(self.right, 'one', 'four')

        self.assertIsNot(new_table, self.left)
        self.assertIsNot(new_table, self.right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'five', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text])
        self.assertRows(new_table, [
            (1, 4, 'a', 4, 'a'),
            (2, 3, 'b', 3, 'b'),
            (None, 2, 'c', 2, 'c')
        ])

    def test_join_match_multiple(self):
        left_rows = (
            (1, 4, 'a'),
            (2, 3, 'b')
        )

        right_rows = (
            (1, 1, 'a'),
            (1, 2, 'a'),
            (2, 2, 'b')
        )

        left = Table(left_rows, self.left_column_names, self.column_types)
        right = Table(right_rows, self.right_column_names, self.column_types)
        new_table = left.join(right, 'one', 'five')

        self.assertIsNot(new_table, left)
        self.assertIsNot(new_table, right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text])
        self.assertRows(new_table, [
            (1, 4, 'a', 1, 'a'),
            (2, 3, 'b', 1, 'a'),
            (2, 3, 'b', 2, 'b')
        ])

    def test_join2(self):
        new_table = self.left.join(self.right, 'one', 'five')

        self.assertIsNot(new_table, self.left)
        self.assertIsNot(new_table, self.right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text])
        self.assertRows(new_table, [
            (1, 4, 'a', None, None),
            (2, 3, 'b', None, 'c'),
            (None, 2, 'c', None, None)
        ])

    def test_join_same_column_name(self):
        right_column_names = ['four', 'one', 'six']

        right = Table(self.right_rows, right_column_names, self.column_types)

        new_table = self.left.join(right, 'one')

        self.assertIsNot(new_table, self.left)
        self.assertIsNot(new_table, right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text])
        self.assertRows(new_table, [
            (1, 4, 'a', None, None),
            (2, 3, 'b', None, 'c'),
            (None, 2, 'c', None, None)
        ])

    def test_join_multiple_columns(self):
        new_table = self.left.join(
            self.right,
            ['two', 'three'],
            ['five', 'six']
        )

        self.assertIsNot(new_table, self.left)
        self.assertIsNot(new_table, self.right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'four'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number])
        self.assertRows(new_table, [
            (1, 4, 'a', 1),
            (2, 3, 'b', 2),
            (None, 2, 'c', None)
        ])

    def test_join_func(self):
        new_table = self.left.join(
            self.right,
            lambda left: '%i%s' % (left['two'], left['three']),
            lambda right: '%i%s' % (right['five'], right['six'])
        )

        self.assertIsNot(new_table, self.left)
        self.assertIsNot(new_table, self.right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'five', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number, Number, Text])
        self.assertRows(new_table, [
            (1, 4, 'a', 1, 4, 'a'),
            (2, 3, 'b', 2, 3, 'b'),
            (None, 2, 'c', None, 2, 'c')
        ])

    def test_join_column_does_not_exist(self):
        with self.assertRaises(KeyError):
            self.left.join(self.right, 'one', 'seven')

    def test_inner_join(self):
        new_table = self.left.join(self.right, 'one', 'four', inner=True)

        self.assertIsNot(new_table, self.left)
        self.assertIsNot(new_table, self.right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'five', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text])
        self.assertRows(new_table, [
            (1, 4, 'a', 4, 'a'),
            (2, 3, 'b', 3, 'b'),
            (None, 2, 'c', 2, 'c')
        ])

    def test_inner_join2(self):
        new_table = self.left.join(self.right, 'one', 'five', inner=True)

        self.assertIsNot(new_table, self.left)
        self.assertIsNot(new_table, self.right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text])
        self.assertRows(new_table, [
            (2, 3, 'b', None, 'c')
        ])

    def test_inner_join_same_column_name(self):
        right_column_names = ['four', 'one', 'six']

        right = Table(self.right_rows, right_column_names, self.column_types)

        new_table = self.left.join(right, 'one', inner=True)

        self.assertIsNot(new_table, self.left)
        self.assertIsNot(new_table, right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text])
        self.assertRows(new_table, [
            (2, 3, 'b', None, 'c')
        ])

    def test_inner_join_func(self):
        new_table = self.left.join(
            self.right,
            lambda left: '%i%s' % (left['two'], left['three']),
            lambda right: '%i%s' % (right['five'], right['six']),
            inner=True
        )

        self.assertIsNot(new_table, self.left)
        self.assertIsNot(new_table, self.right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'five', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number, Number, Text])
        self.assertRows(new_table, [
            (1, 4, 'a', 1, 4, 'a')
        ])

    def test_join_with_row_names(self):
        left = Table(self.left_rows, self.left_column_names, self.column_types, row_names='three')
        new_table = left.join(self.right, 'one', 'four')

        self.assertRowNames(new_table, ('a', 'b', 'c'))

    def test_join_require_match(self):
        with self.assertRaises(ValueError):
            new_table = self.left.join(self.right, 'one', 'five', require_match=True)  # noqa

        with self.assertRaises(ValueError):
            new_table = self.left.join(self.right, 'one', 'five', require_match=True)  # noqa

        new_table = self.left.join(self.right, 'one', 'four', require_match=True)  # noqa

    def test_join_columns_kwarg(self):
        new_table = self.left.join(self.right, 'one', 'four', columns=['six'])

        self.assertIsNot(new_table, self.left)
        self.assertIsNot(new_table, self.right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Text])
        self.assertRows(new_table, [
            (1, 4, 'a', 'a'),
            (2, 3, 'b', 'b'),
            (None, 2, 'c', 'c')
        ])

    def test_join_columns_kwarg_right_key(self):
        new_table = self.left.join(self.right, 'one', 'four', columns=['four', 'six'])

        self.assertIsNot(new_table, self.left)
        self.assertIsNot(new_table, self.right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text])
        self.assertRows(new_table, [
            (1, 4, 'a', 1, 'a'),
            (2, 3, 'b', 2, 'b'),
            (None, 2, 'c', None, 'c')
        ])
示例#13
0
class TestJoin(AgateTestCase):
    def setUp(self):
        self.left_rows = (
            (1, 4, 'a'),
            (2, 3, 'b'),
            (None, 2, 'c')
        )

        self.right_rows = (
            (1, 4, 'a'),
            (2, 3, 'b'),
            (None, 2, 'c')
        )

        self.number_type = Number()
        self.text_type = Text()

        self.left_column_names = ['one', 'two', 'three']
        self.right_column_names = ['four', 'five', 'six']
        self.column_types = [self.number_type, self.number_type, self.text_type]

        self.left = Table(self.left_rows, self.left_column_names, self.column_types)
        self.right = Table(self.right_rows, self.right_column_names, self.column_types)

    def test_join(self):
        new_table = self.left.join(self.right, 'one', 'four')

        self.assertIsNot(new_table, self.left)
        self.assertIsNot(new_table, self.right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'five', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text])
        self.assertRows(new_table, [
            (1, 4, 'a', 4, 'a'),
            (2, 3, 'b', 3, 'b'),
            (None, 2, 'c', 2, 'c')
        ])

    def test_join_column_indicies(self):
        new_table = self.left.join(self.right, 0, 0)

        self.assertIsNot(new_table, self.left)
        self.assertIsNot(new_table, self.right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'five', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text])
        self.assertRows(new_table, [
            (1, 4, 'a', 4, 'a'),
            (2, 3, 'b', 3, 'b'),
            (None, 2, 'c', 2, 'c')
        ])

    def test_join_match_multiple(self):
        left_rows = (
            (1, 4, 'a'),
            (2, 3, 'b')
        )

        right_rows = (
            (1, 1, 'a'),
            (1, 2, 'a'),
            (2, 2, 'b')
        )

        left = Table(left_rows, self.left_column_names, self.column_types)
        right = Table(right_rows, self.right_column_names, self.column_types)
        new_table = left.join(right, 'one', 'five')

        self.assertIsNot(new_table, left)
        self.assertIsNot(new_table, right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text])
        self.assertRows(new_table, [
            (1, 4, 'a', 1, 'a'),
            (2, 3, 'b', 1, 'a'),
            (2, 3, 'b', 2, 'b')
        ])

    def test_join2(self):
        new_table = self.left.join(self.right, 'one', 'five')

        self.assertIsNot(new_table, self.left)
        self.assertIsNot(new_table, self.right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text])
        self.assertRows(new_table, [
            (1, 4, 'a', None, None),
            (2, 3, 'b', None, 'c'),
            (None, 2, 'c', None, None)
        ])

    def test_join_same_column_name(self):
        right_column_names = ['four', 'one', 'six']

        right = Table(self.right_rows, right_column_names, self.column_types)

        new_table = self.left.join(right, 'one')

        self.assertIsNot(new_table, self.left)
        self.assertIsNot(new_table, right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text])
        self.assertRows(new_table, [
            (1, 4, 'a', None, None),
            (2, 3, 'b', None, 'c'),
            (None, 2, 'c', None, None)
        ])

    def test_join_multiple_columns(self):
        new_table = self.left.join(
            self.right,
            ['two', 'three'],
            ['five', 'six']
        )

        self.assertIsNot(new_table, self.left)
        self.assertIsNot(new_table, self.right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'four'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number])
        self.assertRows(new_table, [
            (1, 4, 'a', 1),
            (2, 3, 'b', 2),
            (None, 2, 'c', None)
        ])

    def test_join_func(self):
        new_table = self.left.join(
            self.right,
            lambda left: '%i%s' % (left['two'], left['three']),
            lambda right: '%i%s' % (right['five'], right['six'])
        )

        self.assertIsNot(new_table, self.left)
        self.assertIsNot(new_table, self.right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'five', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number, Number, Text])
        self.assertRows(new_table, [
            (1, 4, 'a', 1, 4, 'a'),
            (2, 3, 'b', 2, 3, 'b'),
            (None, 2, 'c', None, 2, 'c')
        ])

    def test_join_column_does_not_exist(self):
        with self.assertRaises(KeyError):
            self.left.join(self.right, 'one', 'seven')

    def test_inner_join(self):
        new_table = self.left.join(self.right, 'one', 'four', inner=True)

        self.assertIsNot(new_table, self.left)
        self.assertIsNot(new_table, self.right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'five', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text])
        self.assertRows(new_table, [
            (1, 4, 'a', 4, 'a'),
            (2, 3, 'b', 3, 'b'),
            (None, 2, 'c', 2, 'c')
        ])

    def test_inner_join2(self):
        new_table = self.left.join(self.right, 'one', 'five', inner=True)

        self.assertIsNot(new_table, self.left)
        self.assertIsNot(new_table, self.right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text])
        self.assertRows(new_table, [
            (2, 3, 'b', None, 'c')
        ])

    def test_inner_join_same_column_name(self):
        right_column_names = ['four', 'one', 'six']

        right = Table(self.right_rows, right_column_names, self.column_types)

        new_table = self.left.join(right, 'one', inner=True)

        self.assertIsNot(new_table, self.left)
        self.assertIsNot(new_table, right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text])
        self.assertRows(new_table, [
            (2, 3, 'b', None, 'c')
        ])

    def test_inner_join_func(self):
        new_table = self.left.join(
            self.right,
            lambda left: '%i%s' % (left['two'], left['three']),
            lambda right: '%i%s' % (right['five'], right['six']),
            inner=True
        )

        self.assertIsNot(new_table, self.left)
        self.assertIsNot(new_table, self.right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'five', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number, Number, Text])
        self.assertRows(new_table, [
            (1, 4, 'a', 1, 4, 'a')
        ])

    def test_join_with_row_names(self):
        left = Table(self.left_rows, self.left_column_names, self.column_types, row_names='three')
        new_table = left.join(self.right, 'one', 'four')

        self.assertRowNames(new_table, ('a', 'b', 'c'))

    def test_join_require_match(self):
        with self.assertRaises(ValueError):
            self.left.join(self.right, 'one', 'five', require_match=True)

        with self.assertRaises(ValueError):
            self.left.join(self.right, 'one', 'five', require_match=True)

        new_table = self.left.join(self.right, 'one', 'four', require_match=True)  # noqa

    def test_join_columns_kwarg(self):
        new_table = self.left.join(self.right, 'one', 'four', columns=['six'])

        self.assertIsNot(new_table, self.left)
        self.assertIsNot(new_table, self.right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Text])
        self.assertRows(new_table, [
            (1, 4, 'a', 'a'),
            (2, 3, 'b', 'b'),
            (None, 2, 'c', 'c')
        ])

    def test_join_columns_kwarg_right_key(self):
        new_table = self.left.join(self.right, 'one', 'four', columns=['four', 'six'])

        self.assertIsNot(new_table, self.left)
        self.assertIsNot(new_table, self.right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text])
        self.assertRows(new_table, [
            (1, 4, 'a', 1, 'a'),
            (2, 3, 'b', 2, 'b'),
            (None, 2, 'c', None, 'c')
        ])

    def test_join_rows_are_tuples(self):
        new_table = self.left.join(self.right, 'one', 'four', columns=['four', 'six'])

        self.assertIsInstance(new_table.rows[0].values(), tuple)

    def test_full_outer(self):
        left_rows = (
            (1, 4, 'a'),
            (2, 3, 'b'),
            (3, 2, 'c')
        )

        right_rows = (
            (1, 4, 'a'),
            (2, 3, 'b'),
            (4, 2, 'c')
        )

        left = Table(left_rows, self.left_column_names, self.column_types)
        right = Table(right_rows, self.right_column_names, self.column_types)

        new_table = left.join(right, 'one', 'four', full_outer=True)

        self.assertIsNot(new_table, left)
        self.assertIsNot(new_table, right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'five', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number, Number, Text])
        self.assertRows(new_table, [
            (1, 4, 'a', 1, 4, 'a'),
            (2, 3, 'b', 2, 3, 'b'),
            (3, 2, 'c', None, None, None),
            (None, None, None, 4, 2, 'c')
        ])

    def test_join_by_row_number(self):
        new_table = self.left.join(self.right, full_outer=True)

        self.assertIsNot(new_table, self.left)
        self.assertIsNot(new_table, self.right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'five', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number, Number, Text])
        self.assertRows(new_table, [
            (1, 4, 'a', 1, 4, 'a'),
            (2, 3, 'b', 2, 3, 'b'),
            (None, 2, 'c', None, 2, 'c')
        ])

    def test_join_by_row_number_short_right(self):
        right_rows = self.right_rows + ((7, 9, 'z'),)
        right = Table(right_rows, self.right_column_names, self.column_types)

        new_table = self.left.join(right, full_outer=True)

        self.assertIsNot(new_table, self.left)
        self.assertIsNot(new_table, right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'five', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number, Number, Text])
        self.assertRows(new_table, [
            (1, 4, 'a', 1, 4, 'a'),
            (2, 3, 'b', 2, 3, 'b'),
            (None, 2, 'c', None, 2, 'c'),
            (None, None, None, 7, 9, 'z')
        ])

    def test_join_by_row_number_short_left(self):
        left_rows = self.left_rows + ((7, 9, 'z'),)
        left = Table(left_rows, self.left_column_names, self.column_types)

        new_table = left.join(self.right, full_outer=True)

        self.assertIsNot(new_table, left)
        self.assertIsNot(new_table, self.right)
        self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'five', 'six'])
        self.assertColumnTypes(new_table, [Number, Number, Text, Number, Number, Text])
        self.assertRows(new_table, [
            (1, 4, 'a', 1, 4, 'a'),
            (2, 3, 'b', 2, 3, 'b'),
            (None, 2, 'c', None, 2, 'c'),
            (7, 9, 'z', None, None, None)
        ])
示例#14
0
    def test_join_with_row_names(self):
        left = Table(self.left_rows, self.left_column_names, self.column_types, row_names='three')
        new_table = left.join(self.right, 'one', 'four')

        self.assertSequenceEqual(new_table.rows['a'], (1, 4, 'a', 4, 'a'))
        self.assertSequenceEqual(new_table.row_names, ('a', 'b', 'c'))
示例#15
0
文件: test_join.py 项目: 01-/agate
    def test_join_with_row_names(self):
        left = Table(self.left_rows, self.left_column_names, self.column_types, row_names='three')
        new_table = left.join(self.right, 'one', 'four')

        self.assertRowNames(new_table, ('a', 'b', 'c'))
示例#16
0
class TestTableJoin(unittest.TestCase):
    def setUp(self):
        self.left_rows = (
            (1, 4, 'a'),
            (2, 3, 'b'),
            (None, 2, 'c')
        )

        self.right_rows = (
            (1, 4, 'a'),
            (2, 3, 'b'),
            (None, 2, 'c')
        )

        self.number_type = Number()
        self.text_type = Text()

        self.left_column_names = ['one', 'two', 'three']
        self.right_column_names = ['four', 'five', 'six']
        self.column_types = [self.number_type, self.number_type, self.text_type]

        self.left = Table(self.left_rows, self.left_column_names, self.column_types)
        self.right = Table(self.right_rows, self.right_column_names, self.column_types)

    def test_join(self):
        new_table = self.left.join(self.right, 'one', 'four')

        self.assertEqual(len(new_table.rows), 3)
        self.assertEqual(len(new_table.columns), 5)

        self.assertEqual(new_table.columns[0].name, 'one')
        self.assertEqual(new_table.columns[1].name, 'two')
        self.assertEqual(new_table.columns[2].name, 'three')
        self.assertEqual(new_table.columns[3].name, 'five')
        self.assertEqual(new_table.columns[4].name, 'six')

        self.assertIsInstance(new_table.columns[0].data_type, Number)
        self.assertIsInstance(new_table.columns[1].data_type, Number)
        self.assertIsInstance(new_table.columns[2].data_type, Text)
        self.assertIsInstance(new_table.columns[3].data_type, Number)
        self.assertIsInstance(new_table.columns[4].data_type, Text)

        self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a', 4, 'a'))
        self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b', 3, 'b'))
        self.assertSequenceEqual(new_table.rows[2], (None, 2, 'c', 2, 'c'))

    def test_join_match_multiple(self):
        left_rows = (
            (1, 4, 'a'),
            (2, 3, 'b')
        )

        right_rows = (
            (1, 1, 'a'),
            (1, 2, 'a'),
            (2, 2, 'b')
        )

        left = Table(left_rows, self.left_column_names, self.column_types)
        right = Table(right_rows, self.right_column_names, self.column_types)
        new_table = left.join(right, 'one', 'five')

        self.assertEqual(len(new_table.rows), 3)
        self.assertEqual(len(new_table.columns), 5)

        self.assertEqual(new_table.columns[0].name, 'one')
        self.assertEqual(new_table.columns[1].name, 'two')
        self.assertEqual(new_table.columns[2].name, 'three')
        self.assertEqual(new_table.columns[3].name, 'four')
        self.assertEqual(new_table.columns[4].name, 'six')

        self.assertIsInstance(new_table.columns[0].data_type, Number)
        self.assertIsInstance(new_table.columns[1].data_type, Number)
        self.assertIsInstance(new_table.columns[2].data_type, Text)
        self.assertIsInstance(new_table.columns[3].data_type, Number)
        self.assertIsInstance(new_table.columns[4].data_type, Text)

        self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a', 1, 'a'))
        self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b', 1, 'a'))
        self.assertSequenceEqual(new_table.rows[2], (2, 3, 'b', 2, 'b'))

    def test_join2(self):
        new_table = self.left.join(self.right, 'one', 'five')

        self.assertEqual(len(new_table.rows), 3)
        self.assertEqual(len(new_table.columns), 5)

        self.assertEqual(new_table.columns[0].name, 'one')
        self.assertEqual(new_table.columns[1].name, 'two')
        self.assertEqual(new_table.columns[2].name, 'three')
        self.assertEqual(new_table.columns[3].name, 'four')
        self.assertEqual(new_table.columns[4].name, 'six')

        self.assertIsInstance(new_table.columns[0].data_type, Number)
        self.assertIsInstance(new_table.columns[1].data_type, Number)
        self.assertIsInstance(new_table.columns[2].data_type, Text)
        self.assertIsInstance(new_table.columns[3].data_type, Number)
        self.assertIsInstance(new_table.columns[4].data_type, Text)

        self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a', None, None))
        self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b', None, 'c'))
        self.assertSequenceEqual(new_table.rows[2], (None, 2, 'c', None, None))

    def test_join_same_column_name(self):
        right_column_names = ['four', 'one', 'six']

        right = Table(self.right_rows, right_column_names, self.column_types)

        new_table = self.left.join(right, 'one')

        self.assertEqual(len(new_table.rows), 3)
        self.assertEqual(len(new_table.columns), 5)

        self.assertEqual(new_table.columns[0].name, 'one')
        self.assertEqual(new_table.columns[1].name, 'two')
        self.assertEqual(new_table.columns[2].name, 'three')
        self.assertEqual(new_table.columns[3].name, 'four')
        self.assertEqual(new_table.columns[4].name, 'six')

        self.assertIsInstance(new_table.columns[0].data_type, Number)
        self.assertIsInstance(new_table.columns[1].data_type, Number)
        self.assertIsInstance(new_table.columns[2].data_type, Text)
        self.assertIsInstance(new_table.columns[3].data_type, Number)
        self.assertIsInstance(new_table.columns[4].data_type, Text)

        self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a', None, None))
        self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b', None, 'c'))
        self.assertSequenceEqual(new_table.rows[2], (None, 2, 'c', None, None))

    def test_join_func(self):
        new_table = self.left.join(
            self.right,
            lambda left: '%i%s' % (left['two'], left['three']),
            lambda right: '%i%s' % (right['five'], right['six'])
        )

        self.assertEqual(len(new_table.rows), 3)
        self.assertEqual(len(new_table.columns), 6)

        self.assertEqual(new_table.columns[0].name, 'one')
        self.assertEqual(new_table.columns[1].name, 'two')
        self.assertEqual(new_table.columns[2].name, 'three')
        self.assertEqual(new_table.columns[3].name, 'four')
        self.assertEqual(new_table.columns[4].name, 'five')
        self.assertEqual(new_table.columns[5].name, 'six')

        self.assertIsInstance(new_table.columns[0].data_type, Number)
        self.assertIsInstance(new_table.columns[1].data_type, Number)
        self.assertIsInstance(new_table.columns[2].data_type, Text)
        self.assertIsInstance(new_table.columns[3].data_type, Number)
        self.assertIsInstance(new_table.columns[4].data_type, Number)
        self.assertIsInstance(new_table.columns[5].data_type, Text)

        self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a', 1, 4, 'a'))
        self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b', 2, 3, 'b'))
        self.assertSequenceEqual(new_table.rows[2], (None, 2, 'c', None, 2, 'c'))

    def test_join_column_does_not_exist(self):
        with self.assertRaises(KeyError):
            self.left.join(self.right, 'one', 'seven')

    def test_inner_join(self):
        new_table = self.left.join(self.right, 'one', 'four', inner=True)

        self.assertEqual(len(new_table.rows), 3)
        self.assertEqual(len(new_table.columns), 5)

        self.assertEqual(new_table.columns[0].name, 'one')
        self.assertEqual(new_table.columns[1].name, 'two')
        self.assertEqual(new_table.columns[2].name, 'three')
        self.assertEqual(new_table.columns[3].name, 'five')
        self.assertEqual(new_table.columns[4].name, 'six')

        self.assertIsInstance(new_table.columns[0].data_type, Number)
        self.assertIsInstance(new_table.columns[1].data_type, Number)
        self.assertIsInstance(new_table.columns[2].data_type, Text)
        self.assertIsInstance(new_table.columns[3].data_type, Number)
        self.assertIsInstance(new_table.columns[4].data_type, Text)

        self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a', 4, 'a'))
        self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b', 3, 'b'))
        self.assertSequenceEqual(new_table.rows[2], (None, 2, 'c', 2, 'c'))

    def test_inner_join2(self):
        new_table = self.left.join(self.right, 'one', 'five', inner=True)

        self.assertEqual(len(new_table.rows), 1)
        self.assertEqual(len(new_table.columns), 5)

        self.assertEqual(new_table.columns[0].name, 'one')
        self.assertEqual(new_table.columns[1].name, 'two')
        self.assertEqual(new_table.columns[2].name, 'three')
        self.assertEqual(new_table.columns[3].name, 'four')
        self.assertEqual(new_table.columns[4].name, 'six')

        self.assertIsInstance(new_table.columns[0].data_type, Number)
        self.assertIsInstance(new_table.columns[1].data_type, Number)
        self.assertIsInstance(new_table.columns[2].data_type, Text)
        self.assertIsInstance(new_table.columns[3].data_type, Number)
        self.assertIsInstance(new_table.columns[4].data_type, Text)

        self.assertSequenceEqual(new_table.rows[0], (2, 3, 'b', None, 'c'))

    def test_inner_join_same_column_name(self):
        right_column_names = ['four', 'one', 'six']

        right = Table(self.right_rows, right_column_names, self.column_types)

        new_table = self.left.join(right, 'one', inner=True)

        self.assertEqual(len(new_table.rows), 1)
        self.assertEqual(len(new_table.columns), 5)

        self.assertEqual(new_table.columns[0].name, 'one')
        self.assertEqual(new_table.columns[1].name, 'two')
        self.assertEqual(new_table.columns[2].name, 'three')
        self.assertEqual(new_table.columns[3].name, 'four')
        self.assertEqual(new_table.columns[4].name, 'six')

        self.assertIsInstance(new_table.columns[0].data_type, Number)
        self.assertIsInstance(new_table.columns[1].data_type, Number)
        self.assertIsInstance(new_table.columns[2].data_type, Text)
        self.assertIsInstance(new_table.columns[3].data_type, Number)
        self.assertIsInstance(new_table.columns[4].data_type, Text)

        self.assertSequenceEqual(new_table.rows[0], (2, 3, 'b', None, 'c'))

    def test_inner_join_func(self):
        new_table = self.left.join(
            self.right,
            lambda left: '%i%s' % (left['two'], left['three']),
            lambda right: '%i%s' % (right['five'], right['six']),
            inner=True
        )

        self.assertEqual(len(new_table.rows), 3)
        self.assertEqual(len(new_table.columns), 6)

        self.assertEqual(new_table.columns[0].name, 'one')
        self.assertEqual(new_table.columns[1].name, 'two')
        self.assertEqual(new_table.columns[2].name, 'three')
        self.assertEqual(new_table.columns[3].name, 'four')
        self.assertEqual(new_table.columns[4].name, 'five')
        self.assertEqual(new_table.columns[5].name, 'six')

        self.assertIsInstance(new_table.columns[0].data_type, Number)
        self.assertIsInstance(new_table.columns[1].data_type, Number)
        self.assertIsInstance(new_table.columns[2].data_type, Text)
        self.assertIsInstance(new_table.columns[3].data_type, Number)
        self.assertIsInstance(new_table.columns[4].data_type, Number)
        self.assertIsInstance(new_table.columns[5].data_type, Text)

    def test_join_with_row_names(self):
        left = Table(self.left_rows, self.left_column_names, self.column_types, row_names='three')
        new_table = left.join(self.right, 'one', 'four')

        self.assertSequenceEqual(new_table.rows['a'], (1, 4, 'a', 4, 'a'))
        self.assertSequenceEqual(new_table.row_names, ('a', 'b', 'c'))
示例#17
0
class VoteWatcher(object):
    """
    Example usage:
    # Init vote watcher
    x = VoteWatch(128,"https://www.eduskunta.fi/FI/vaski/Sivut/Aanestys.aspx?aanestysnro=19&istuntonro=82&vuosi=2015")
    
    # Print a table with all MP's
    x.table_all.to_csv("table.csv")

    # Print a table with only people that changed opinions
    x.table_diff.to_csv("table_change.csv")    
    """

    def __init__(self, question_id, vote_url, reverse=False):
        """ 
        - question_id : an id of a question as defined in questions.py 
        - vote_url    : a url the the voting record of on eduskunta.fi
            ie. https://www.eduskunta.fi/FI/vaski/Sivut/Aanestys.aspx?aanestysnro=19&istuntonro=82&vuosi=2015
        - reverse    : Set to true if "agree" in election mahcine equals
            "no" in parliament vote.
        """
        self.vote_url = vote_url
        self.reverse = reverse
        try:
            self.question = QUESTIONS[question_id]
        except KeyError:
            raise ValueError("No such question id")

        self._get_promise_table()
        self._get_vote_table()
        self._compare_answers()

    def _get_promise_table(self):
        """ Get the answers from the voting advice application
        """
        print "Get election machine data (YLE)"
        table = Table.from_csv("yle-vaalikone-2015.csv")\
            .where(lambda row: row["valittu"] == 1)\
            .select(["nimi", "puolue", self.question])
        self.promise_table = rename_column(table, self.question, "promise")

    def _get_vote_table(self):
        """ Get the votes of every mp
        """
        print "Get votes from eduskunta.fi"
        resp = requests.get(self.vote_url)
        soup = BeautifulSoup(resp.text, "html.parser")

        """ TODO: Make a better selector
        """
        _rows = [] 
        trs = soup.find_all("li", {"class": "expand"})[1].find("table").find_all("tr")
        for row in trs:
            cells = row.find_all("td")
            name_cell = cells[0].text.split("/")
            name = name_cell[0].strip()
            party = name_cell[1].strip()
            vote = cells[1].text
            _rows.append([name, party, vote])

        self.vote_table = Table(_rows, column_names=["name", "party", "vote"])


    def _compare_answers(self):
        """ Check if the answers in the voting mahcine correspond with the
            vote.
        """
        print "Compare promises and votes"
        table = self.vote_table.join(self.promise_table,"name", right_key="nimi")
        self.table_all = table.compute([
            ("comparison", AnswerComparison())
        ])
        self.table_diff = self.table_all\
            .where(lambda row: row['comparison'] == "different opinion")
        
        print "Found %s MP's that didn't vote according to their promises." % len(self.table_diff.rows)