Exemplo n.º 1
0
    def setUp(self):

        self.pg = Postgres(username='******',
                           password='******',
                           host='test',
                           db='test',
                           port=123)

        self.tbl = Table([['ID', 'Name'], [1, 'Jim'], [2, 'John'],
                          [3, 'Sarah']])

        self.tbl2 = Table([
            ["c1", "c2", "c3", "c4", "c5", "c6", "c7"],
            ["a", "", 1, "NA", 1.4, 1, 2],
            ["b", "", 2, "NA", 1.4, 1, 2],
            ["c", "", 3.4, "NA", "", "", "a"],
            ["d", "", 5, "NA", 1.4, 1, 2],
            ["e", "", 6, "NA", 1.4, 1, 2],
            ["f", "", 7.8, "NA", 1.4, 1, 2],
            ["g", "", 9, "NA", 1.4, 1, 2],
        ])

        self.mapping = self.pg.generate_data_types(self.tbl)
        self.mapping2 = self.pg.generate_data_types(self.tbl2)
        self.pg.DO_PARSE_BOOLS = True
        self.mapping3 = self.pg.generate_data_types(self.tbl2)
Exemplo n.º 2
0
    def setUp(self):

        self.pg = Postgres(username='******',
                           password='******',
                           host='test',
                           db='test',
                           port=123)

        self.tbl = Table([['ID', 'Name'], [1, 'Jim'], [2, 'John'],
                          [3, 'Sarah']])

        self.mapping = self.pg.generate_data_types(self.tbl)
Exemplo n.º 3
0
    def test_connection(self):

        # Test connection with kwargs passed
        Postgres(username='******', password='******', host='test', db='test')

        # Test connection with env variables
        os.environ['PGUSER'] = '******'
        os.environ['PGPASSWORD'] = '******'
        os.environ['PGHOST'] = 'host_env'
        os.environ['PGDATABASE'] = 'db_env'
        os.environ['PGPORT'] = '5432'
        pg_env = Postgres()

        self.assertEqual(pg_env.username, 'user_env')
        self.assertEqual(pg_env.password, 'pass_env')
        self.assertEqual(pg_env.host, 'host_env')
        self.assertEqual(pg_env.db, 'db_env')
        self.assertEqual(pg_env.port, 5432)
Exemplo n.º 4
0
    def setUp(self):

        self.temp_schema = TEMP_SCHEMA
        self.db = Postgres()

        # Create a schema.
        setup_sql = f"""
                     DROP SCHEMA IF EXISTS {self.temp_schema} CASCADE;
                     CREATE SCHEMA {self.temp_schema};
                     """
        self.db.query(setup_sql)

        # Load dummy data to parsons tables
        self.table1 = Table.from_csv(f'{_dir}/test_data/sample_table_1.csv')
        self.table2 = Table.from_csv(f'{_dir}/test_data/sample_table_2.csv')

        # Create source table
        self.db.copy(self.table1, f'{self.temp_schema}.source')

        # Create DB Sync object
        self.db_sync = DBSync(self.db, self.db)
Exemplo n.º 5
0
    def setUp(self):

        self.temp_schema = TEMP_SCHEMA
        self.pg = Postgres()

        self.tbl = Table([['ID', 'Name'], [1, 'Jim'], [2, 'John'],
                          [3, 'Sarah']])

        # Create a schema, create a table, create a view
        setup_sql = f"""
                    drop schema if exists {self.temp_schema} cascade;
                    create schema {self.temp_schema};
                    """

        other_sql = f"""
                    create table {self.temp_schema}.test (id smallint,name varchar(5));
                    create view {self.temp_schema}.test_view as (select * from {self.temp_schema}.test);
                    """  # noqa: E501

        self.pg.query(setup_sql)

        self.pg.query(other_sql)
Exemplo n.º 6
0
class TestPostgresDBSync(unittest.TestCase):
    def setUp(self):

        self.temp_schema = TEMP_SCHEMA
        self.db = Postgres()

        # Create a schema.
        setup_sql = f"""
                     DROP SCHEMA IF EXISTS {self.temp_schema} CASCADE;
                     CREATE SCHEMA {self.temp_schema};
                     """
        self.db.query(setup_sql)

        # Load dummy data to parsons tables
        self.table1 = Table.from_csv(f'{_dir}/test_data/sample_table_1.csv')
        self.table2 = Table.from_csv(f'{_dir}/test_data/sample_table_2.csv')

        # Create source table
        self.db.copy(self.table1, f'{self.temp_schema}.source')

        # Create DB Sync object
        self.db_sync = DBSync(self.db, self.db)

    def tearDown(self):
        # Drop the view, the table and the schema

        teardown_sql = f"""
                       DROP SCHEMA IF EXISTS {self.temp_schema} CASCADE;
                       """
        self.db.query(teardown_sql)

    def test_table_sync_full_drop(self):
        # Test a db sync with drop.

        self.db_sync.table_sync_full(f'{self.temp_schema}.source',
                                     f'{self.temp_schema}.destination')

        source = self.db.query(f"SELECT * FROM {self.temp_schema}.source")
        destination = self.db.query(
            f"SELECT * FROM {self.temp_schema}.destination")
        assert_matching_tables(source, destination)

    def test_table_sync_full_truncate(self):
        # Test a db sync with truncate.

        self.db_sync.table_sync_full(f'{self.temp_schema}.source',
                                     f'{self.temp_schema}.destination',
                                     if_exists='truncate')
        source = self.db.query(f"SELECT * FROM {self.temp_schema}.source")
        destination = self.db.query(
            f"SELECT * FROM {self.temp_schema}.destination")
        assert_matching_tables(source, destination)

    def test_table_sync_full_empty_table(self):
        # Test a full sync of a table when the source table is empty.

        # Empty the source table
        self.db.query(f"TRUNCATE {self.temp_schema}.source")

        # Attempt to sync.
        self.db_sync.table_sync_full(f'{self.temp_schema}.source',
                                     f'{self.temp_schema}.destination')

    def test_table_sync_full_chunk(self):
        # Test chunking in full sync.

        self.db_sync.chunk_size = 10
        self.db_sync.table_sync_full(f'{self.temp_schema}.source',
                                     f'{self.temp_schema}.destination')

        source = self.db.query(f"SELECT * FROM {self.temp_schema}.source")
        destination = self.db.query(
            f"SELECT * FROM {self.temp_schema}.destination")
        assert_matching_tables(source, destination)

    def test_table_sync_incremental(self):
        # Test that incremental sync

        self.db.copy(self.table1, f'{self.temp_schema}.destination')
        self.db.copy(self.table2,
                     f'{self.temp_schema}.source',
                     if_exists='append')
        self.db_sync.table_sync_incremental(f'{self.temp_schema}.source',
                                            f'{self.temp_schema}.destination',
                                            'pk')

        count1 = self.db.query(f"SELECT * FROM {self.temp_schema}.source")
        count2 = self.db.query(f"SELECT * FROM {self.temp_schema}.destination")
        assert_matching_tables(count1, count2)

    def test_table_sync_incremental_chunk(self):
        # Test chunking of incremental sync.

        self.db_sync.chunk_size = 10
        self.db.copy(self.table1, f'{self.temp_schema}.destination')
        self.db.copy(self.table2,
                     f'{self.temp_schema}.source',
                     if_exists='append')
        self.db_sync.table_sync_incremental(f'{self.temp_schema}.source',
                                            f'{self.temp_schema}.destination',
                                            'pk')

        count1 = self.db.query(f"SELECT * FROM {self.temp_schema}.source")
        count2 = self.db.query(f"SELECT * FROM {self.temp_schema}.destination")
        assert_matching_tables(count1, count2)

    def test_table_sync_incremental_create_destination_table(self):
        # Test that an incremental sync works if the destination table does not exist.

        self.db_sync.table_sync_incremental(f'{self.temp_schema}.source',
                                            f'{self.temp_schema}.destination',
                                            'pk')

        count1 = self.db.query(f"SELECT * FROM {self.temp_schema}.source")
        count2 = self.db.query(f"SELECT * FROM {self.temp_schema}.destination")
        assert_matching_tables(count1, count2)

    def test_table_sync_incremental_empty_table(self):
        # Test an incremental sync of a table when the source table is empty.

        # Empty the source table
        self.db.query(f"TRUNCATE {self.temp_schema}.source")

        # Attempt to sync.
        self.db_sync.table_sync_incremental(f'{self.temp_schema}.source',
                                            f'{self.temp_schema}.destination',
                                            'pk')
Exemplo n.º 7
0
class TestPostgresCreateStatement(unittest.TestCase):
    def setUp(self):

        self.pg = Postgres(username='******',
                           password='******',
                           host='test',
                           db='test',
                           port=123)

        self.tbl = Table([['ID', 'Name'], [1, 'Jim'], [2, 'John'],
                          [3, 'Sarah']])

        self.mapping = self.pg.generate_data_types(self.tbl)

    def test_connection(self):

        # Test connection with kwargs passed
        arg_pg = Postgres(username='******',
                          password='******',
                          host='test',
                          db='test')

        # Test connection with env variables
        os.environ['PGUSER'] = '******'
        os.environ['PGPASSWORD'] = '******'
        os.environ['PGHOST'] = 'host_env'
        os.environ['PGDATABASE'] = 'db_env'
        os.environ['PGPORT'] = '5432'
        pg_env = Postgres()

        self.assertEqual(pg_env.username, 'user_env')
        self.assertEqual(pg_env.password, 'pass_env')
        self.assertEqual(pg_env.host, 'host_env')
        self.assertEqual(pg_env.db, 'db_env')
        self.assertEqual(pg_env.port, 5432)

    def test_data_type(self):

        # Test smallint
        self.assertEqual(self.pg.data_type(1, ''), 'smallint')
        # Test int
        self.assertEqual(self.pg.data_type(32769, ''), 'int')
        # Test bigint
        self.assertEqual(self.pg.data_type(2147483648, ''), 'bigint')
        # Test varchar that looks like an int
        self.assertEqual(self.pg.data_type('00001', ''), 'varchar')
        # Test a float as a decimal
        self.assertEqual(self.pg.data_type(5.001, ''), 'decimal')
        # Test varchar
        self.assertEqual(self.pg.data_type('word', ''), 'varchar')
        # Test int with underscore
        self.assertEqual(self.pg.data_type('1_2', ''), 'varchar')
        # Test int with leading zero
        self.assertEqual(self.pg.data_type('01', ''), 'varchar')

    def test_generate_data_types(self):

        # Test correct header labels
        self.assertEqual(self.mapping['headers'], ['ID', 'Name'])
        # Test correct data types
        self.assertEqual(self.mapping['type_list'], ['smallint', 'varchar'])
        # Test correct lengths
        self.assertEqual(self.mapping['longest'], [1, 5])

    def test_vc_padding(self):

        # Test padding calculated correctly
        self.assertEqual(self.pg.vc_padding(self.mapping, .2), [1, 6])

    def test_vc_max(self):

        # Test max sets it to the max
        self.assertEqual(self.pg.vc_max(self.mapping, ['Name']), [1, 65535])

        # Test raises when can't find column
        # To Do

    def test_vc_validate(self):

        # Test that a column with a width of 0 is set to 1
        self.mapping['longest'][0] = 0
        self.mapping = self.pg.vc_validate(self.mapping)
        self.assertEqual(self.mapping, [1, 5])

    def test_create_sql(self):

        # Test the the statement is expected
        sql = self.pg.create_sql('tmc.test', self.mapping, distkey='ID')
        exp_sql = "create table tmc.test (\n  id smallint,\n  name varchar(5)) \ndistkey(ID) ;"
        self.assertEqual(sql, exp_sql)

    def test_column_validate(self):

        bad_cols = [
            '', 'SELECT',
            'asdfjkasjdfklasjdfklajskdfljaskldfjaklsdfjlaksdfjklasjdfklasjdkfljaskldfljkasjdkfasjlkdfjklasdfjklakjsfasjkdfljaslkdfjklasdfjklasjkldfakljsdfjalsdkfjklasjdfklasjdfklasdkljf'
        ]  # noqa: E501
        fixed_cols = [
            'col_0', 'col_1',
            'asdfjkasjdfklasjdfklajskdfljaskldfjaklsdfjlaksdfjklasjdfklasjdkfljaskldfljkasjdkfasjlkdfjklasdfjklakjsfasjkdfljaslkdfjkl'
        ]  # noqa: E501
        self.assertEqual(self.pg.column_name_validate(bad_cols), fixed_cols)

    def test_create_statement(self):

        # Assert that copy statement is expected
        sql = self.pg.create_statement(self.tbl, 'tmc.test', distkey='ID')
        exp_sql = """create table tmc.test (\n  "id" smallint,\n  "name" varchar(5)) \ndistkey(ID) ;"""  # noqa: E501
        self.assertEqual(sql, exp_sql)

        # Assert that an error is raised by an empty table
        empty_table = Table([['Col_1', 'Col_2']])
        self.assertRaises(ValueError, self.pg.create_statement, empty_table,
                          'tmc.test')
Exemplo n.º 8
0
class TestPostgresDB(unittest.TestCase):
    def setUp(self):

        self.temp_schema = TEMP_SCHEMA

        self.pg = Postgres()

        self.tbl = Table([['ID', 'Name'], [1, 'Jim'], [2, 'John'],
                          [3, 'Sarah']])

        # Create a schema, create a table, create a view
        setup_sql = f"""
                    drop schema if exists {self.temp_schema} cascade;
                    create schema {self.temp_schema};
                    """

        other_sql = f"""
                    create table {self.temp_schema}.test (id smallint,name varchar(5));
                    create view {self.temp_schema}.test_view as (select * from {self.temp_schema}.test);
                    """ # noqa: E501

        self.pg.query(setup_sql)

        self.pg.query(other_sql)

    def tearDown(self):

        # Drop the view, the table and the schema
        teardown_sql = f"""
                       drop schema if exists {self.temp_schema} cascade;
                       """
        self.pg.query(teardown_sql)

    def test_query(self):

        # Check that query sending back expected result
        r = self.pg.query('select 1')
        self.assertEqual(r[0]['?column?'], 1)

    def test_query_with_parameters(self):
        table_name = f"{self.temp_schema}.test"
        self.pg.copy(self.tbl, f"{self.temp_schema}.test", if_exists='append')

        sql = f"select * from {table_name} where name = %s"
        name = 'Sarah'
        r = self.pg.query(sql, parameters=[name])
        self.assertEqual(r[0]['name'], name)

        sql = f"select * from {table_name} where name in (%s, %s)"
        names = ['Sarah', 'John']
        r = self.pg.query(sql, parameters=names)
        self.assertEqual(r.num_rows, 2)

    def test_copy(self):

        # Copy a table and ensure table exists
        self.pg.copy(self.tbl,
                     f'{self.temp_schema}.test_copy',
                     if_exists='drop')
        r = self.pg.query(
            f"select * from {self.temp_schema}.test_copy where name='Jim'")
        self.assertEqual(r[0]['id'], 1)

        # Copy table and ensure truncate works.
        self.pg.copy(self.tbl,
                     f'{self.temp_schema}.test_copy',
                     if_exists='truncate')
        tbl = self.pg.query(
            f"select count(*) from {self.temp_schema}.test_copy")
        self.assertEqual(tbl.first, 3)

        # Copy table and ensure that drop works.
        self.pg.copy(self.tbl,
                     f'{self.temp_schema}.test_copy',
                     if_exists='drop')
        tbl = self.pg.query(
            f"select count(*) from {self.temp_schema}.test_copy")
        self.assertEqual(tbl.first, 3)

        # Copy table and ensure that append works.
        self.pg.copy(self.tbl,
                     f'{self.temp_schema}.test_copy',
                     if_exists='append')
        tbl = self.pg.query(
            f"select count(*) from {self.temp_schema}.test_copy")
        self.assertEqual(tbl.first, 6)

        # Try to copy the table and ensure that default fail works.
        self.assertRaises(ValueError, self.pg.copy, self.tbl,
                          f'{self.temp_schema}.test_copy')

        # Try to copy the table and ensure that explicit fail works.
        self.assertRaises(ValueError,
                          self.pg.copy,
                          self.tbl,
                          f'{self.temp_schema}.test_copy',
                          if_exists='fail')