def test_recover_from_state(self, mock_connect, mock_execute, _, __): """continues to read a table from the saved state""" tables = [ { 'value': 'public.test1' }, { 'value': 'public.test2' }, { 'value': 'public.test3' }, ] last_index = 1 self.source['state'] = {'last_index': last_index, 'max_value': 100} inst = Postgres(self.source, OPTIONS) inst.tables = tables cursor_return_value = mock_connect.return_value.cursor.return_value cursor_return_value.fetchall.return_value = [{ 'id': 101 }, { 'id': 102 }, { 'id': 103 }] inst.read() first_query = mock_execute.call_args_list[0][0][0] self.assertTrue( "\"inckey\" >= 'incval' AND \"inckey\" <= '100'" in first_query) self.assertTrue('FROM "public"."test2"' in first_query)
def test_read_end_stream(self, m): '''reads the entire table from the database and validates that the stream returns None to indicate the end''' mock_recs = [{ 'id': 1, 'col1': 'foo1', 'col2': 'bar1' }, { 'id': 2, 'col1': 'foo2', 'col2': 'bar2' }, { 'id': 3, 'col1': 'foo3', 'col2': 'bar3' }] inst = Postgres(self.source, OPTIONS) inst.tables = [{'value': 'my_schema.foo_bar'}] result_order = [mock_recs, []] m.return_value.cursor.return_value.fetchall.side_effect = result_order rows = inst.read() self.assertEqual(len(rows), len(mock_recs)) empty = inst.read() self.assertEqual(empty, []) end = inst.read() self.assertEqual(end, None)
def test_read_retries(self, mock_connect): inst = Postgres(self.source, OPTIONS) inst.tables = [{'value': 'my_schema.foo_bar'}] mock_connect.side_effect = psycopg2.DatabaseError('TestRetiresError') with self.assertRaises(psycopg2.DatabaseError): inst.read() self.assertEqual(mock_connect.call_count, postgres.source.MAX_RETRIES)
def test_incremental(self, mock_connect): inst = Postgres(self.source, OPTIONS) inst.tables = [{'value': 'schema.foo'}] inst.read() q = ('DECLARE cur CURSOR FOR ' 'SELECT * FROM "schema"."foo" WHERE inckey > \'incval\'') execute_mock = mock_connect.return_value.cursor.return_value.execute execute_mock.assert_has_calls([mock.call(q)], True)
def test_batch_size(self, mock_connect, mock_execute): customBatchSize = 42 self.source['__batchSize'] = customBatchSize inst = Postgres(self.source, OPTIONS) inst.tables = [{'value': 'my_schema.foo_bar'}] cursor_return_value = mock_connect.return_value.cursor.return_value cursor_return_value.fetchall.return_value = self.mock_recs inst.read() second_query = mock_execute.call_args_list[1][0][0] txt = 'FETCH FORWARD %s' % customBatchSize self.assertTrue(second_query.startswith(txt))
def test_no_state_for_empty_results(self, mock_connect, mock_state, _, __): """before returning a batch of data, the sources state should be reported as well as having the state ID appended to each data object""" inst = Postgres(self.source, OPTIONS) table_name = 'my_schema.foo_bar' inst.tables = [{'value': table_name}] result_order = [[], []] cursor_return_value = mock_connect.return_value.cursor.return_value cursor_return_value.fetchall.side_effect = result_order inst.read() # State function was called with relevant table name and row count mock_state.assert_not_called()
def test_read(self, m): '''reads a table from the database and validates that each row has a __tablename and __schemaname column''' mock_recs = [{ 'id': 1, 'col1': 'foo1', 'col2': 'bar1' }, { 'id': 2, 'col1': 'foo2', 'col2': 'bar2' }, { 'id': 3, 'col1': 'foo3', 'col2': 'bar3' }] inst = Postgres(self.source, OPTIONS) inst.tables = [{'value': 'my_schema.foo_bar'}] m.return_value.cursor.return_value.fetchall.return_value = mock_recs rows = inst.read() self.assertEqual(len(rows), len(mock_recs)) for x in range(0, len(rows)): self.assertEqual(rows[x]['__tablename'], 'foo_bar') self.assertEqual(rows[x]['__schemaname'], 'my_schema')
def test_recover_from_state(self, mock_connect, mock_execute): ''' continues to read a table from the saved state ''' table_offset = 100 self.source['state'] = {'my_schema.foo_bar': table_offset} inst = Postgres(self.source, OPTIONS) inst.tables = [{'value': 'my_schema.foo_bar'}] cursor_return_value = mock_connect.return_value.cursor.return_value cursor_return_value.fetchall.return_value = self.mock_recs inst.read() first_query = mock_execute.call_args_list[0][0][0] # The query should use `OFFSET` to skip already collected data self.assertTrue(first_query.endswith('OFFSET %s' % table_offset)) # Three records were returned so the loaded count should be OFFSET + 3 self.assertEqual(inst.loaded, table_offset + len(self.mock_recs))
def test_read_end_stream(self, mock_connect): '''reads the entire table from the database and validates that the stream returns None to indicate the end''' inst = Postgres(self.source, OPTIONS) inst.tables = [{'value': 'my_schema.foo_bar'}] result_order = [self.mock_recs, []] cursor_return_value = mock_connect.return_value.cursor.return_value cursor_return_value.fetchall.side_effect = result_order rows = inst.read() self.assertEqual(len(rows), len(self.mock_recs)) empty = inst.read() self.assertEqual(empty, []) end = inst.read() self.assertEqual(end, None)
def test_custom_port(self, m): source = { "addr": "test.database.name:5439/foobar", "user": "******", "password": "******", "tables": [{ 'value': 'schema.foo' }] } inst = Postgres(source, OPTIONS) inst.read() m.assert_called_with(host="test.database.name", port=5439, user=source['user'], password=source['password'], dbname="foobar")
def test_connection_parameters(self, mock_connect): source = { "addr": "test.database:5439/foobar?sslmode=verify-full", "user": "******", "password": "******", "tables": [{ 'value': 'schema.foo' }] } inst = Postgres(source, OPTIONS) inst.read() mock_connect.assert_called_with( dsn="postgres://test.database:5439/foobar?sslmode=verify-full", user=source['user'], password=source['password'], connect_timeout=postgres.source.CONNECT_TIMEOUT)
def test_no_port(self, mock_connect): source = { "addr": "test.database.name/foobar", "user": "******", "password": "******", "tables": [{ 'value': 'schema.foo' }] } inst = Postgres(source, OPTIONS) inst.read() mock_connect.assert_called_with( dsn="postgres://test.database.name/foobar", user=source['user'], password=source['password'], connect_timeout=postgres.source.CONNECT_TIMEOUT)
def test_schema_name(self, mock_connect): '''Test schema name is used when queries and that both schema and table names are wrapped in enclosing quotes''' source = { "addr": "test.database.name/foobar", "user": "******", "password": "******", "tables": [{ 'value': 'schema.foo' }] } inst = Postgres(source, OPTIONS) inst.read() q = 'DECLARE cur CURSOR FOR SELECT * FROM "schema"."foo"' execute_mock = mock_connect.return_value.cursor.return_value.execute execute_mock.assert_has_calls([mock.call(q)], True)
def test_custom_port(self, mock_connect): source = { "addr": "test.database.name:5439/foobar", "user": "******", "password": "******", "tables": [{ 'value': 'schema.foo' }] } inst = Postgres(source, OPTIONS) inst.read() mock_connect.assert_called_with( host="test.database.name", port=5439, user=source['user'], password=source['password'], dbname="foobar", connect_timeout=postgres.source.CONNECT_TIMEOUT)
def test_retry_with_last_values(self, mock_connect, mock_metadata, _): mock_metadata.side_effect = lambda *args: [{ 'attname': 'col1', 'indisunique': True, 'indisprimary': True }, { 'attname': 'col2', 'indisunique': True, 'indisprimary': True }] inst = Postgres(self.source, OPTIONS) inst.tables = [{'value': 'my_schema.foo_bar'}] inst.batch_size = 1 cursor_execute = mock_connect.return_value.cursor.return_value.execute cursor_execute.side_effect = [ lambda *args: None, lambda *args: None, psycopg2.DatabaseError('TestRetriesError'), lambda *args: None, lambda *args: None, ] cursor_return_value = mock_connect.return_value.cursor.return_value cursor_return_value.fetchall.return_value = self.mock_recs # First read no error inst.read() # Raise retry error inst.read() # Extract mock call arguments args = mock_connect.return_value.cursor.return_value\ .execute.call_args_list args = [r[0] for r, _ in args] args = filter(lambda x: 'DECLARE' in x, args) # Second DECLARATION of cursor should start from last row fetched self.assertTrue( 'WHERE ("col1","col2") >= (\'foo3\',\'bar3\')' in args[1])
def test_read_from_other_schema(self, mock_connect, mock_metadata, __): inst = Postgres(self.source, OPTIONS) inst.tables = [{ 'value': 'my_schema.foo_bar' }, { 'value': 'your_schema.bar_foo' }] cursor_return_value = mock_connect.return_value.cursor.return_value mock_data = [self.mock_recs[:1], []] * 2 cursor_return_value.fetchall.side_effect = mock_data expected = [('my_schema', 'foo_bar'), ('my_schema', 'foo_bar'), ('your_schema', 'bar_foo'), ('your_schema', 'bar_foo')] for expected_schema, expected_table in expected: inst.read() _, schema, table = mock_metadata.call_args[0] self.assertEqual(schema, expected_schema) self.assertEqual(table, expected_table)
def test_read(self, mock_connect): '''reads a table from the database and validates that each row has a __tablename and __schemaname column''' inst = Postgres(self.source, OPTIONS) inst.tables = [{'value': 'my_schema.foo_bar'}] cursor_return_value = mock_connect.return_value.cursor.return_value cursor_return_value.fetchall.return_value = self.mock_recs rows = inst.read() self.assertEqual(len(rows), len(self.mock_recs)) for x in range(0, len(rows)): self.assertEqual(rows[x]['__tablename'], 'foo_bar') self.assertEqual(rows[x]['__schemaname'], 'my_schema')
def test_query_with_no_keys(self, mock_connect, _): inst = Postgres(self.source, OPTIONS) inst.tables = [{'value': 'my_schema.foo_bar'}] cursor_return_value = mock_connect.return_value.cursor.return_value cursor_return_value.fetchall.side_effect = [ [], [ { 'attname': 'id', 'data_type': 'integer' }, { 'attname': 'name', 'data_type': 'text' }, ], [], ] cursor_execute = mock_connect.return_value.cursor.return_value.execute cursor_execute.return_value = lambda *args: None inst.read() # Extract mock call arguments args = mock_connect.return_value.cursor.return_value \ .execute.call_args_list args = [r[0] for r, _ in args] args = filter(lambda x: 'DECLARE' in x, args) expected = 'DECLARE cur ' \ 'CURSOR FOR SELECT * FROM "my_schema"."foo_bar" ' \ "WHERE (\"inckey\" >= 'incval' AND \"inckey\" <= '100') " \ 'ORDER BY "id","inckey"' self.assertEqual(args[0], expected)
def test_reports_state(self, mock_connect, mock_state): '''before returning a batch of data, the sources state should be reported as well as having the state ID appended to each data object''' inst = Postgres(self.source, OPTIONS) table_name = 'my_schema.foo_bar' inst.tables = [{'value': table_name}] result_order = [self.mock_recs, []] cursor_return_value = mock_connect.return_value.cursor.return_value cursor_return_value.fetchall.side_effect = result_order rows = inst.read() state_id = rows[0]['__state'] state_obj = dict([(table_name, len(self.mock_recs))]) msg = 'State ID is not the same in all rows!' for row in rows: self.assertEqual(row['__state'], state_id, msg) # State function was called with relevant table name and row count mock_state.assert_called_with(state_id, state_obj)
def test_connect_error(self, m): inst = Postgres(self.source, OPTIONS) inst.tables = [{'value': 'schema.foo'}] m.side_effect = psycopg2.OperationalError('Mock Error') with self.assertRaises(PanoplyException): inst.read()
def test_read_end_stream(self, mock_connect, mock_execute, mock_metadata, _): """reads the entire table from the database and validates that the stream returns None to indicate the end""" tables = [ { 'value': 'public.table1' }, { 'value': 'public.table2' }, { 'value': 'public.table3' }, ] mock_metadata.side_effect = [ [{ 'attname': 'col1' }], [{ 'attname': 'col2' }], [{ 'attname': 'col3' }], ] inst = Postgres(self.source, OPTIONS) inst.tables = tables result_order = [ self.mock_recs, [], self.mock_recs, [], self.mock_recs, [] ] cursor_return_value = mock_connect.return_value.cursor.return_value cursor_return_value.fetchall.side_effect = result_order # First call to read result = inst.read() self.assertEqual(len(result), len(self.mock_recs)) query = mock_execute.call_args_list[0][0][0] expected_query = 'FROM "public"."table1" ' \ 'WHERE ("inckey" >= \'incval\' ' \ 'AND "inckey" <= \'100\') '\ 'ORDER BY "col1","inckey"' self.assertTrue(expected_query in query) query = mock_execute.call_args_list[1][0][0] expected_query = 'FETCH FORWARD' self.assertTrue(expected_query in query) # Second call to read result = inst.read() self.assertEqual(result, []) query = mock_execute.call_args_list[2][0][0] expected_query = 'FETCH FORWARD' self.assertTrue(expected_query in query) # Third call to read result = inst.read() self.assertEqual(len(result), len(self.mock_recs)) query = mock_execute.call_args_list[3][0][0] expected_query = 'FROM "public"."table2" ' \ 'WHERE ("inckey" >= \'incval\' ' \ 'AND "inckey" <= \'100\') '\ 'ORDER BY "col2","inckey"' self.assertTrue(expected_query in query) query = mock_execute.call_args_list[4][0][0] expected_query = 'FETCH FORWARD' self.assertTrue(expected_query in query) # Fourth call to read result = inst.read() self.assertEqual(result, []) query = mock_execute.call_args_list[5][0][0] expected_query = 'FETCH FORWARD' self.assertTrue(expected_query in query) # Fifth call to read result = inst.read() self.assertEqual(len(result), len(self.mock_recs)) query = mock_execute.call_args_list[6][0][0] expected_query = 'FROM "public"."table3" ' \ 'WHERE ("inckey" >= \'incval\' ' \ 'AND "inckey" <= \'100\') '\ 'ORDER BY "col3","inckey"' self.assertTrue(expected_query in query) query = mock_execute.call_args_list[7][0][0] expected_query = 'FETCH FORWARD' self.assertTrue(expected_query in query) # Sixth call to read result = inst.read() self.assertEqual(result, []) query = mock_execute.call_args_list[8][0][0] expected_query = 'FETCH FORWARD' self.assertTrue(expected_query in query) end = inst.read() self.assertEqual(end, None)