def fetch_server_id(self): with db_utils.get_db_connection( self.get_properties(), self.get_credentials()).cursor() as cur: cur.execute("SELECT @@server_id") server_id = cur.fetchone()[0] return server_id
def initialize_db(self, engine): connection = db_utils.get_db_connection(self.get_properties(), self.get_credentials()) with connection.cursor() as cur: create_databases_sql = """ DROP DATABASE IF EXISTS {}; CREATE DATABASE {}; """.format(self.database_name(), self.database_name()) cur.execute(create_databases_sql) cur.execute( """ SELECT EXISTS ( SELECT 1 FROM information_schema.tables WHERE table_schema = %s AND table_name = %s);""", [self.database_name(), self.table_name()]) existing_table = cur.fetchone()[0] if existing_table: cur.execute("DROP TABLE {}.{}".format(self.database_name(), self.table_name())) create_table_sql = """ CREATE TABLE {}.{} ( id BIGINT PRIMARY KEY, our_json JSON ) ENGINE = {} """.format(self.database_name(), self.table_name(), engine) cur.execute(create_table_sql) # Ensure expected engine in use cur.execute( """ SELECT TABLE_NAME, ENGINE FROM information_schema.tables where table_schema = %s;""", [self.database_name()]) engine_in_use = cur._result.rows[0][1] self.assertEqual( engine, engine_in_use.upper(), msg="Unexpected engine in use: {}".format(engine_in_use)) for record in [rec_1]: self.insert_record(cur, record) print("\n\nMySQL DB Instantiated." + \ "\nNAME: {}\nENGINE: {}".format(self.database_name(), engine_in_use) + \ "\nTABLE: {}\nEVENTS: 1 record inserted\n\n".format(self.table_name()))
def setUp(self): missing_envs = [ x for x in [ os.getenv('TAP_MYSQL_HOST'), os.getenv('TAP_MYSQL_USER'), os.getenv('TAP_MYSQL_PASSWORD'), os.getenv('TAP_MYSQL_PORT') ] if x == None ] if len(missing_envs) != 0: raise Exception( "set TAP_MYSQL_HOST, TAP_MYSQL_USER, TAP_MYSQL_PASSWORD, TAP_MYSQL_PORT" ) print("setting up mysql databases and tables") props = self.get_properties() props.pop('database') # Don't connect to specific database for setup connection = db_utils.get_db_connection(props, self.get_credentials()) with connection.cursor() as cur: create_databases_sql = """ DROP DATABASE IF EXISTS {}; CREATE DATABASE {}; """.format(self.database_name(), self.database_name()) cur.execute(create_databases_sql) create_table_sql = """ CREATE TABLE {}.full_table ( a_pk INTEGER AUTO_INCREMENT PRIMARY KEY, a_varchar VARCHAR(10)); """.format(self.database_name()) cur.execute(create_table_sql) create_composite_key_table_sql = """ CREATE TABLE {}.full_table_composite_key ( a_pk INTEGER AUTO_INCREMENT, a_varchar VARCHAR(10), PRIMARY KEY (a_pk, a_varchar) ); """.format(self.database_name()) cur.execute(create_composite_key_table_sql) for table_name in self.table_names(): for record in self.dummy_data(): self.insert_record(cur, record, table_name)
def setUp(self): self.maxDiff = None missing_envs = [ x for x in [ os.getenv('TAP_MYSQL_HOST'), os.getenv('TAP_MYSQL_USER'), os.getenv('TAP_MYSQL_PASSWORD'), os.getenv('TAP_MYSQL_PORT') ] if x == None ] if len(missing_envs) != 0: raise Exception( "set TAP_MYSQL_HOST, TAP_MYSQL_USER, TAP_MYSQL_PASSWORD, TAP_MYSQL_PORT" ) print("setting up mysql databases and tables") connection = db_utils.get_db_connection(self.get_properties(), self.get_credentials()) with connection.cursor() as cursor: flatten = lambda l: [item for sublist in l for item in sublist] var_string_for_table = lambda t: ', '.join(['%s'] * len(dummy_data[ t][0])) create_databases_sql = ''' DROP DATABASE IF EXISTS tap_tester_mysql_0; CREATE DATABASE tap_tester_mysql_0; DROP DATABASE IF EXISTS tap_tester_mysql_1; CREATE DATABASE tap_tester_mysql_1; ''' cursor.execute(create_databases_sql) var_string = var_string_for_table('simple_example') simple_example_table_sql = ''' CREATE TABLE tap_tester_mysql_0.simple_example ( c_pk INTEGER PRIMARY KEY, c_varchar VARCHAR(255), c_dt DATETIME); INSERT INTO tap_tester_mysql_0.simple_example VALUES (%s), (%s), (%s), (%s), (%s); ''' % (var_string, var_string, var_string, var_string, var_string) cursor.execute(simple_example_table_sql, flatten(dummy_data['simple_example']))
def binlog_json_test(self): print("RUNNING {}\n\n".format(self.name())) conn_id = connections.ensure_connection(self) # run in check mode check_job_name = runner.run_check_mode(self, conn_id) # verify check exit codes exit_status = menagerie.get_exit_status(conn_id, check_job_name) menagerie.verify_check_exit_status(self, exit_status, check_job_name) expected_check_streams = {self.tap_stream_id()} expected_sync_streams = {self.table_name()} expected_pks = {self.table_name(): {'id'}} # verify the tap discovered the right streams found_catalogs = [ catalog for catalog in menagerie.get_catalogs(conn_id) if catalog['tap_stream_id'] in expected_check_streams ] self.assertGreaterEqual( len(found_catalogs), 1, msg="unable to locate schemas for connection {}".format(conn_id)) found_catalog_names = set( map(lambda c: c['tap_stream_id'], found_catalogs)) diff = expected_check_streams.symmetric_difference(found_catalog_names) self.assertEqual( len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) # verify that persisted streams have the correct properties test_catalog = found_catalogs[0] self.assertEqual(self.table_name(), test_catalog['stream_name']) print("discovered streams are correct") additional_md = [{ "breadcrumb": [], "metadata": { 'replication-method': 'LOG_BASED' } }] selected_metadata = connections.select_catalog_and_fields_via_metadata( conn_id, test_catalog, menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), additional_md) # clear state menagerie.set_state(conn_id, {}) # run initial full table sync sync_job_name = runner.run_sync_mode(self, conn_id) exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) # verify the persisted schema was correct records_by_stream = runner.get_records_from_target_output() self.maxDiff = None for stream, recs in records_by_stream.items(): self.assertEqual( recs['schema'], expected_schemas[stream], msg= "Persisted schema did not match expected schema for stream `{}`." .format(stream)) record_count_by_stream = runner.examine_target_output_file( self, conn_id, expected_sync_streams, expected_pks) self.assertEqual(record_count_by_stream, {self.table_name(): 1}) records_for_stream = runner.get_records_from_target_output()[ self.table_name()] messages_for_stream = records_for_stream['messages'] message_actions = [rec['action'] for rec in messages_for_stream] self.assertEqual(message_actions, ['activate_version', 'upsert', 'activate_version']) # ensure some log_file and log_pos state was persisted state = menagerie.get_state(conn_id) bookmark = state['bookmarks'][self.tap_stream_id()] self.assertIsNotNone(bookmark['log_file']) self.assertIsNotNone(bookmark['log_pos']) expected_log_file = bookmark['log_file'] expected_log_pos = bookmark['log_pos'] # grab version, log_file and log_pos from state to check later expected_table_version = records_for_stream['table_version'] self.assertEqual(expected_table_version, bookmark['version']) # check for expected records upsert_records = [ m['data'] for m in messages_for_stream if m['action'] == 'upsert' ] self.assertEqual([expected_rec_1], upsert_records) # run binlog sync sync_job_name = runner.run_sync_mode(self, conn_id) exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) # check that version state = menagerie.get_state(conn_id) bookmark = state['bookmarks'][self.tap_stream_id()] self.assertEqual(expected_table_version, bookmark['version']) # verify the persisted schema was correct records_by_stream = runner.get_records_from_target_output() for stream, recs in records_by_stream.items(): self.assertEqual( recs['schema'], expected_schemas[stream], msg= "Persisted schema did not match expected schema for stream `{}`." .format(stream)) # record count should be empty as we did not persist anything to the gate record_count_by_stream = runner.examine_target_output_file( self, conn_id, expected_sync_streams, expected_pks) self.assertEqual(record_count_by_stream, {}) # insert a new huge row data = dict([('foooo%i' % i, 'baaaaar%i' % i) for i in range(2560)], literal=True) rec = {'id': 2, 'our_json': json.dumps(data)} with db_utils.get_db_connection( self.get_properties(), self.get_credentials()).cursor() as cur: self.insert_record(cur, rec) # run binlog sync sync_job_name = runner.run_sync_mode(self, conn_id) # verify tap and target exit codes exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) # check that version from state is unchanged state = menagerie.get_state(conn_id) bookmark = state['bookmarks'][self.tap_stream_id()] self.assertEqual(expected_table_version, bookmark['version']) # Either the log_file is the same but the log_pos has increased or the log_file # has rotated and the numeric suffix has increased if expected_log_file == bookmark['log_file']: self.assertGreater(bookmark['log_pos'], expected_log_pos) else: expected_log_file_suffix = re.search('^.*\.(\d+)$', expected_log_file).groups()[0] updated_log_file_suffix = re.search( '^.*\.(\d+)$', bookmark['log_file']).groups()[0] self.assertGreater(int(updated_log_file_suffix), int(expected_log_file_suffix)) expected_log_file = bookmark['log_file'] expected_log_pos = bookmark['log_pos'] expected_rec_2 = copy.deepcopy(rec) # check for expected records records_for_stream = runner.get_records_from_target_output()[ self.table_name()] messages_for_stream = records_for_stream['messages'] message_actions = [rec['action'] for rec in messages_for_stream] self.assertEqual(message_actions, ['upsert']) upsert_records = [ m['data'] for m in messages_for_stream if m['action'] == 'upsert' ] del upsert_records[0]['_sdc_deleted_at'] expected_json = json.loads(expected_rec_2.get('our_json', {})) actual_json = json.loads(upsert_records[0].get('our_json', {})) self.assertTrue(len(actual_json.keys()) > 0) self.assertEqual(expected_json, actual_json)
def test_run(self): conn_id = connections.ensure_connection(self) # run in check mode check_job_name = runner.run_check_mode(self, conn_id) # verify check exit codes exit_status = menagerie.get_exit_status(conn_id, check_job_name) menagerie.verify_check_exit_status(self, exit_status, check_job_name) # verify the tap discovered the right streams found_catalogs = [ fc for fc in menagerie.get_catalogs(conn_id) if fc['tap_stream_id'] in self.expected_check_streams() ] self.assertGreater( len(found_catalogs), 0, msg="unable to locate schemas for connection {}".format(conn_id)) found_catalog_names = set( map(lambda c: c['tap_stream_id'], found_catalogs)) diff = self.expected_check_streams().symmetric_difference( found_catalog_names) self.assertEqual( len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) # verify that persisted streams have the correct properties for c in found_catalogs: catalog_props_to_check = ['stream_name', 'tap_stream_id'] stream = c['stream_name'] for prop in catalog_props_to_check: self.assertEqual( c[prop], expected_catalogs[stream][prop], msg= "unexpected stream catalog property `{}` for stream `{}`: `{}` != `{}`" .format(prop, stream, expected_catalogs[stream][prop], c[prop])) print("discovered streams are correct") print('checking discoverd metadata for tap_tester_mysql_0-incremental') incremental_catalog = [ c for c in found_catalogs if c['tap_stream_id'] == 'tap_tester_mysql_0-incremental' ][0] md = menagerie.get_annotated_schema( conn_id, incremental_catalog['stream_id'])['metadata'] incremental_stream_metadata = { 'database-name': 'tap_tester_mysql_0', 'row-count': 3, 'is-view': False, 'selected-by-default': False, 'table-key-properties': ['c_pk'] } self.assertEqual( sorted(md, key=lambda x: x['breadcrumb']), [{ 'breadcrumb': [], 'metadata': incremental_stream_metadata }, { 'breadcrumb': ['properties', 'c_dt'], 'metadata': { 'selected-by-default': True, 'sql-datatype': 'datetime' } }, { 'breadcrumb': ['properties', 'c_pk'], 'metadata': { 'selected-by-default': True, 'sql-datatype': 'int(11)' } }, { 'breadcrumb': ['properties', 'c_varchar'], 'metadata': { 'selected-by-default': True, 'sql-datatype': 'varchar(255)' } }, { 'breadcrumb': ['properties', 'c_varchar_to_deselect'], 'metadata': { 'selected-by-default': True, 'sql-datatype': 'varchar(255)' } }]) print('checking discovered metadata for tap_tester_mysql_1-view') view_catalog = [ c for c in found_catalogs if c['tap_stream_id'] == 'tap_tester_mysql_1-view' ][0] view_catalog_key_properties_md = [{ 'breadcrumb': [], 'metadata': { 'view-key-properties': ['c_pk'] } }] connections.set_non_discoverable_metadata( conn_id, view_catalog, menagerie.get_annotated_schema(conn_id, view_catalog['stream_id']), view_catalog_key_properties_md) md = menagerie.get_annotated_schema( conn_id, view_catalog['stream_id'])['metadata'] view_stream_metadata = { 'database-name': 'tap_tester_mysql_1', 'is-view': True, 'selected-by-default': False, 'view-key-properties': ['c_pk'] } self.assertEqual(sorted(md, key=lambda x: x['breadcrumb']), [{ 'breadcrumb': [], 'metadata': view_stream_metadata }, { 'breadcrumb': ['properties', 'c_pk'], 'metadata': { 'selected-by-default': True, 'sql-datatype': 'int(11)' } }, { 'breadcrumb': ['properties', 'c_varchar'], 'metadata': { 'selected-by-default': True, 'sql-datatype': 'varchar(255)' } }]) #No selected-by-default MD for c_year because it is an unsupported type various_types_catalog = [ c for c in found_catalogs if c['tap_stream_id'] == 'tap_tester_mysql_0-various_types' ][0] md = menagerie.get_annotated_schema( conn_id, various_types_catalog['stream_id'])['metadata'] c_year_md = [ x for x in md if x['breadcrumb'] == ['properties', 'c_year'] ] self.assertEqual(c_year_md, [{ 'breadcrumb': ['properties', 'c_year'], 'metadata': { 'selected-by-default': False, 'sql-datatype': 'year(4)' } }]) ##select_simple_example catalogs_to_select = [ c for c in found_catalogs if c['tap_stream_id'] != 'tap_tester_mysql_0-simple_example' ] for a_catalog in catalogs_to_select: additional_md = [] unselected_fields = [] if a_catalog['tap_stream_id'] == 'tap_tester_mysql_0-incremental': additional_md = [{ "breadcrumb": [], "metadata": { 'replication-key': 'c_dt', 'replication-method': 'INCREMENTAL' } }] unselected_fields = ['c_varchar_to_deselect'] elif a_catalog['tap_stream_id'] == 'tap_tester_mysql_1-view': additional_md = [{ "breadcrumb": [], "metadata": { 'view-key-properties': ['c_pk'], 'replication-method': 'FULL_TABLE' } }] else: additional_md = [{ "breadcrumb": [], "metadata": { 'replication-method': 'FULL_TABLE' } }] selected_metadata = connections.select_catalog_and_fields_via_metadata( conn_id, a_catalog, menagerie.get_annotated_schema(conn_id, a_catalog['stream_id']), additional_md, unselected_fields) # clear state menagerie.set_state(conn_id, {}) sync_job_name = runner.run_sync_mode(self, conn_id) # verify tap and target exit codes exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) record_count_by_stream = runner.examine_target_output_file( self, conn_id, self.expected_sync_streams(), self.expected_pks()) replicated_row_count = reduce(lambda accum, c: accum + c, record_count_by_stream.values()) expected_row_count = 8 # {'my_isam': 1, 'various_types': 3, 'incremental': 3, 'view': 1} self.assertEqual( replicated_row_count, expected_row_count, msg="failed to replicate correct number of rows: {}".format( record_count_by_stream)) print("total replicated row count: {}".format(replicated_row_count)) records_by_stream = runner.get_records_from_target_output() # verifications about individual records for stream, recs in records_by_stream.items(): # verify that activate version messages were sent in the proper position self.assertEqual( recs['messages'][0]['action'], 'activate_version', msg= "Expected first message sent for stream `{}` to have action `activate_version`" .format(stream)) # verify the persisted schema was correct self.assertEqual( recs['schema'], expected_schemas[stream], msg= "Persisted schema did not match expected schema for stream `{}`." .format(stream)) # verify that the target output the proper numeric and date representations expected_various_types_records = [{ 'c_time': '1970-01-01T12:34:56.000000Z', 'c_mediumint': 8388607, 'c_smallint': 32767, 'c_tinyint': 127, 'c_date': '2017-09-13T00:00:00.000000Z', 'c_bigint': 9223372036854775807, 'c_decimal': -1, 'c_int': 2147483647, 'c_bit': True, 'c_decimal_2': Decimal('123456789.0'), 'c_pk': 1, 'c_double': Decimal("1.234"), 'c_float': Decimal("1.234"), 'c_decimal_2_unsigned': Decimal("1.23"), 'c_tinyint_1': True }, { 'c_time': '1970-01-01T12:34:57.000000Z', 'c_mediumint': -8388608, 'c_smallint': -32768, 'c_tinyint': -128, 'c_date': '2017-09-14T00:00:00.000000Z', 'c_bigint': -9223372036854775808, 'c_decimal': 0, 'c_int': -2147483648, 'c_bit': False, 'c_decimal_2': Decimal("123456790.0"), 'c_pk': 2, 'c_double': Decimal("2.234"), 'c_float': Decimal("2.234"), 'c_decimal_2_unsigned': Decimal("0.23"), 'c_tinyint_1': False }, { 'c_time': '1970-01-01T12:34:57.000000Z', 'c_mediumint': -8388608, 'c_smallint': -32768, 'c_tinyint': -128, 'c_date': '2017-09-14T00:00:00.000000Z', 'c_bigint': -9223372036854775808, 'c_decimal': 0, 'c_int': -2147483648, 'c_bit': None, 'c_decimal_2': Decimal("123456790.0"), 'c_pk': 3, 'c_double': Decimal("2.234"), 'c_float': Decimal("2.234"), 'c_decimal_2_unsigned': Decimal("0.23"), 'c_tinyint_1': None }] actual_various_types_records = [ r['data'] for r in records_by_stream['various_types']['messages'][1:4] ] self.assertEqual( actual_various_types_records, expected_various_types_records, msg= "Expected `various_types` upsert record data to be {}, but target output {}" .format(expected_various_types_records, actual_various_types_records)) # verify that deselected property was not output expected_incremental_record = { 'c_pk': 1, 'c_dt': '2017-01-01T00:00:00.000000Z', 'c_varchar': 'a' } actual_incremental_record = records_by_stream['incremental'][ 'messages'][1]['data'] self.assertEqual( actual_incremental_record, expected_incremental_record, msg= "Expected first `incremental` upsert record data to be {}, but target output {}" .format(expected_incremental_record, actual_incremental_record)) print("records are correct") # verify state and bookmarks state = menagerie.get_state(conn_id) bookmarks = state['bookmarks'] self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") for k, v in bookmarks.items(): if k == 'tap_tester_mysql_0-incremental': self.assertIsNotNone( v['version'], msg="expected bookmark for stream `{}` to have a version set" .format(k)) self.assertEqual( v['replication_key_value'], '2017-01-01T00:00:02.000000Z', msg= "incorrect replication_key_value in bookmark for stream `{}`" .format(k)) self.assertEqual( v['replication_key'], 'c_dt', msg= "incorrect replication_key specified in bookmark for stream `{}`" .format(k)) else: self.assertFalse( 'version' in v, msg= "expected bookmark for stream `{}` to not have a version key" .format(k)) self.assertTrue( 'initial_full_table_complete' in v, msg= "expected bookmark for stream `{}` to have a true initial_full_table_complete key" .format(k)) print("state and bookmarks are correct") incremental_table_initial_table_version = bookmarks[ 'tap_tester_mysql_0-incremental']['version'] #---------------------------------------------------------------------- # invoke the sync job again after some modifications #---------------------------------------------------------------------- print("adding a column to an existing table in the source db") connection = db_utils.get_db_connection(self.get_properties(), self.get_credentials()) with connection.cursor() as cursor: add_column_sql = ''' ALTER TABLE tap_tester_mysql_0.incremental ADD COLUMN favorite_number INTEGER; INSERT INTO tap_tester_mysql_0.incremental VALUES (4, '4', '2017-01-01 00:00:03', 'yeehaw', 999); ''' cursor.execute(add_column_sql) # run in check mode check_job_name = runner.run_check_mode(self, conn_id) # verify check exit codes exit_status = menagerie.get_exit_status(conn_id, check_job_name) menagerie.verify_check_exit_status(self, exit_status, check_job_name) # verify the tap discovered the right streams found_catalogs = [ fc for fc in menagerie.get_catalogs(conn_id) if fc['tap_stream_id'] in self.expected_check_streams() ] self.assertGreater( len(found_catalogs), 0, msg="unable to locate schemas for connection {}".format(conn_id)) found_catalog_names = set( map(lambda c: c['tap_stream_id'], found_catalogs)) diff = self.expected_check_streams().symmetric_difference( found_catalog_names) self.assertEqual( len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) sync_job_name = runner.run_sync_mode(self, conn_id) # verify tap and target exit codes exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) record_count_by_stream = runner.examine_target_output_file( self, conn_id, self.expected_sync_streams(), self.expected_pks()) replicated_row_count = reduce(lambda accum, c: accum + c, record_count_by_stream.values()) expected_row_count = 7 # {'my_isam': 1, 'various_types': 3, 'incremental': 2, 'view': 1} self.assertEqual( replicated_row_count, expected_row_count, msg="failed to replicate correct number of rows: {}".format( record_count_by_stream)) print("total replicated row count: {}".format(replicated_row_count)) records_by_stream = runner.get_records_from_target_output() expected_schema_of_new_column = { 'maximum': 2147483647, 'selected': True, 'inclusion': 'available', 'type': ['null', 'integer'], 'minimum': -2147483648 } # verifications about individual records for stream, recs in records_by_stream.items(): # verify that a activate version messages were sent in the proper position if stream == 'incremental': self.assertEqual( records_by_stream[stream]['messages'][0]['action'], 'activate_version', msg= "Expected first message sent for stream `{}` not to have action `activate_version`" .format(stream)) expected_schema_of_new_column = { 'maximum': 2147483647, 'inclusion': 'available', 'type': ['null', 'integer'], 'minimum': -2147483648 } self.assertEqual( records_by_stream[stream]['schema']['properties'] ['favorite_number'], expected_schema_of_new_column, msg= "Expected newly-added column to be present in schema for stream `{}`, but it was not." .format(stream)) else: self.assertEqual( records_by_stream[stream]['messages'][0]['action'], 'upsert', msg= "Expected first message sent for stream `{}` to have action `upsert`" .format(stream)) self.assertEqual( records_by_stream[stream]['messages'][-1]['action'], 'activate_version', msg= "Expected last message sent for stream `{}` to have action `activate_version`" .format(stream)) state = menagerie.get_state(conn_id) bookmarks = state['bookmarks'] self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") for k, v in bookmarks.items(): if k == 'tap_tester_mysql_0-incremental': self.assertIsNotNone( v['version'], msg="expected bookmark for stream `{}` to have a version set" .format(k)) self.assertEqual( v['replication_key_value'], '2017-01-01T00:00:03.000000Z', msg= "incorrect replication_key_value in bookmark for stream `{}`" .format(k)) self.assertEqual( v['replication_key'], 'c_dt', msg= "incorrect replication_key specified in bookmark for stream `{}`" .format(k)) else: self.assertFalse( 'version' in v, msg= "expected bookmark for stream `{}` to not have a version key" .format(k)) self.assertTrue( 'initial_full_table_complete' in v, msg= "expected bookmark for stream `{}` to have a true initial_full_table_complete key" .format(k)) print("state and bookmarks are correct") # verify incremental table_version didn't change incremental_table_new_table_version = bookmarks[ 'tap_tester_mysql_0-incremental']['version'] self.assertEqual( incremental_table_initial_table_version, incremental_table_new_table_version, msg= "Expected incrementally-replicated table's table_version to remain unchanged over multiple invocations." )
def setUp(self): self.maxDiff = None missing_envs = [ x for x in [ os.getenv('TAP_MYSQL_HOST'), os.getenv('TAP_MYSQL_USER'), os.getenv('TAP_MYSQL_PASSWORD'), os.getenv('TAP_MYSQL_PORT') ] if x == None ] if len(missing_envs) != 0: raise Exception( "set TAP_MYSQL_HOST, TAP_MYSQL_USER, TAP_MYSQL_PASSWORD, TAP_MYSQL_PORT" ) print("setting up mysql databases and tables") connection = db_utils.get_db_connection(self.get_properties(), self.get_credentials()) with connection.cursor() as cursor: flatten = lambda l: [item for sublist in l for item in sublist] var_string_for_table = lambda t: ', '.join(['%s'] * len(dummy_data[ t][0])) create_databases_sql = ''' DROP DATABASE IF EXISTS tap_tester_mysql_0; CREATE DATABASE tap_tester_mysql_0; DROP DATABASE IF EXISTS tap_tester_mysql_1; CREATE DATABASE tap_tester_mysql_1; ''' cursor.execute(create_databases_sql) simple_example_table_sql = ''' CREATE TABLE tap_tester_mysql_0.simple_example ( c_pk INTEGER PRIMARY KEY, c_varchar VARCHAR(255)); INSERT INTO tap_tester_mysql_0.simple_example VALUES (%s); ''' % var_string_for_table('simple_example') cursor.execute(simple_example_table_sql, flatten(dummy_data['simple_example'])) var_string = var_string_for_table('various_types') various_types_table_sql = ''' CREATE TABLE tap_tester_mysql_0.various_types ( c_pk INTEGER PRIMARY KEY, c_decimal DECIMAL, c_decimal_2_unsigned DECIMAL(5, 2) UNSIGNED, c_decimal_2 DECIMAL(11, 2), c_tinyint TINYINT, c_smallint SMALLINT, c_mediumint MEDIUMINT, c_int INT, c_bigint BIGINT, c_float FLOAT, c_double DOUBLE, c_bit BIT(4), c_date DATE, c_time TIME, c_year YEAR, c_tinyint_1 TINYINT(1)); INSERT INTO tap_tester_mysql_0.various_types VALUES (%s), (%s), (%s); ''' % (var_string, var_string, var_string) cursor.execute(various_types_table_sql, flatten(dummy_data['various_types'])) var_string = var_string_for_table('incremental') incremental_table_sql = ''' CREATE TABLE tap_tester_mysql_0.incremental ( c_pk INTEGER PRIMARY KEY, c_varchar VARCHAR(255), c_dt DATETIME, c_varchar_to_deselect VARCHAR(255)); INSERT INTO tap_tester_mysql_0.incremental VALUES (%s), (%s), (%s); ''' % (var_string, var_string, var_string) cursor.execute(incremental_table_sql, flatten(dummy_data['incremental'])) var_string = var_string_for_table('simple_example') isam_table_sql = ''' CREATE TABLE tap_tester_mysql_1.my_isam ( c_pk INTEGER PRIMARY KEY, c_varchar VARCHAR(255)) ENGINE = MYISAM; INSERT INTO tap_tester_mysql_1.my_isam VALUES (%s); ''' % var_string_for_table('simple_example') cursor.execute(isam_table_sql, flatten(dummy_data['simple_example'])) view_sql = ''' CREATE VIEW tap_tester_mysql_1.view AS SELECT * FROM tap_tester_mysql_0.simple_example; ''' cursor.execute(view_sql)
def binlog_edge_test(self, expected_records=[]): """ Test binlog replication edge cases • Verify an initial sync returns expected records of various datatypes • Verify we bookmark correctly when a transaction spans multiple files • Insert and delete a record prior to sync. Verify both events are replicated • Insert and update a record prior to sync. Verify both events are replicated • Verify a valid log_file and log_pos state are persisted after each sync """ conn_id = connections.ensure_connection(self) # prior to first sync update a record... updated_timestamp = datetime.datetime.now() updated_id = 1 expected_records[1]['our_timestamp_2'] = datetime.datetime.strftime( updated_timestamp, "%Y-%m-%dT%H:%M:%S.%fZ") # insert a record and... inserted_record = self.generate_record_n(len(expected_records)) expected_records += [inserted_record] # TODO need to format # delete a record deleted_id = 2 with db_utils.get_db_connection( self.get_properties(), self.get_credentials()).cursor() as cur: cur.execute( "UPDATE {}.{} SET our_timestamp_2 = '{}' WHERE id = {}".format( self.database_name(), self.table_name_1(), updated_timestamp, updated_id)) self.insert_record(cur, inserted_record, self.table_name_1()) delete_time = datetime.datetime.now() cur.execute("DELETE FROM {}.{} WHERE id = {}".format( self.database_name(), self.table_name_1(), deleted_id)) print( "\n\nMySQL DB Actions." + \ "\nNAME: {}\nTABLE: {}".format(self.database_name(), self.table_name_1()) + \ "\nEVENTS: {} records updated".format(1) + \ "\n {} records deleted\n\n".format(1) ) # run in check mode check_job_name = runner.run_check_mode(self, conn_id) # verify check exit codes exit_status = menagerie.get_exit_status(conn_id, check_job_name) menagerie.verify_check_exit_status(self, exit_status, check_job_name) t1 = self.table_name_1() t2 = self.table_name_2() expected_check_streams = { self.tap_stream_id(t1), self.tap_stream_id(t2) } expected_sync_streams = {t1, t2} expected_pks = {t1: {'id'}, t2: {'id'}} # verify the tap discovered the right streams found_catalogs = [ catalog for catalog in menagerie.get_catalogs(conn_id) if catalog['tap_stream_id'] in expected_check_streams ] self.assertGreaterEqual( len(found_catalogs), 1, msg="unable to locate schemas for connection {}".format(conn_id)) found_catalog_names = set( map(lambda c: c['tap_stream_id'], found_catalogs)) diff = expected_check_streams.symmetric_difference(found_catalog_names) self.assertEqual( len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) # verify that persisted streams have the correct properties self.assertEqual(self.table_name_1(), found_catalogs[0]['stream_name']) self.assertEqual(self.table_name_2(), found_catalogs[1]['stream_name']) print("discovered streams are correct") additional_md = [{ "breadcrumb": [], "metadata": { 'replication-method': 'LOG_BASED' } }] for catalog in found_catalogs: schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) _ = connections.select_catalog_and_fields_via_metadata( conn_id, catalog, catalog, additional_md) # clear state menagerie.set_state(conn_id, {}) # run initial full table sync sync_job_name = runner.run_sync_mode(self, conn_id) exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) # verify the persisted schema was correct records_by_stream = runner.get_records_from_target_output() self.maxDiff = None for stream, recs in records_by_stream.items(): self.assertEqual( recs['schema'], expected_schemas[stream], msg= "Persisted schema did not match expected schema for stream `{}`." .format(stream)) record_count_by_stream = runner.examine_target_output_file( self, conn_id, expected_sync_streams, expected_pks) # BUG missing deleted record | https://stitchdata.atlassian.net/browse/SRCE-4258 # self.assertEqual({self.table_name_1(): len(expected_records)}, record_count_by_stream) records_for_stream = runner.get_records_from_target_output()[ self.table_name_1()] messages_for_stream = records_for_stream['messages'] message_actions = [rec['action'] for rec in messages_for_stream] # verify activate version messages are present self.assertEqual('activate_version', message_actions[0]) self.assertEqual('activate_version', message_actions[-1]) # ensure some log_file and log_pos state was persisted state = menagerie.get_state(conn_id) bookmark = state['bookmarks'][self.tap_stream_id(t1)] self.assertIsNotNone(bookmark['log_file']) self.assertIsNotNone(bookmark['log_pos']) expected_log_file = bookmark['log_file'] expected_log_pos = bookmark['log_pos'] # grab version, log_file and log_pos from state to check later expected_table_version = records_for_stream['table_version'] self.assertEqual(expected_table_version, bookmark['version']) # check for expected records upsert_records = [ m['data'] for m in messages_for_stream if m['action'] == 'upsert' ] # we need to compare record by record since there are so many. # a failure comparing expected_records to upsert_records would result in # an output message greater in length than a standard tmux buffer # BUG missing datetime precision | https://stitchdata.atlassian.net/browse/SRCE-4257 # for expected_record in expected_records: # upsert_record = [rec for rec in upsert_records # if rec['id'] == expected_record['id']] # self.assertEqual(1, len(upsert_record), # msg="multiple upsert_recs with same pk: {}".format(upsert_record)) # self.assertEqual(expected_record, upsert_record.pop()) # TODO add check for _sdc_delete_at for deleted record once bug addressed # run binlog sync sync_job_name = runner.run_sync_mode(self, conn_id) exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) # check that version state = menagerie.get_state(conn_id) bookmark = state['bookmarks'][self.tap_stream_id(t1)] self.assertEqual(expected_table_version, bookmark['version']) # verify the persisted schema was correct records_by_stream = runner.get_records_from_target_output() for stream, recs in records_by_stream.items(): self.assertEqual( recs['schema'], expected_schemas[stream], msg= "Persisted schema did not match expected schema for stream `{}`." .format(stream)) # record count should be empty as we did not persist anything to the gate record_count_by_stream = runner.examine_target_output_file( self, conn_id, expected_sync_streams, expected_pks) self.assertEqual(record_count_by_stream, {}) # Create 1 more record prior to 2nd sync new_record = self.generate_record_n(len(expected_records)) with db_utils.get_db_connection( self.get_properties(), self.get_credentials()).cursor() as cur: self.insert_record(cur, new_record, self.table_name_1()) print( "\n\nMySQL DB Actions." + \ "\nNAME: {}\nTABLE: {}".format(self.database_name(), self.table_name_1()) + \ "\nEVENTS: {} records inserted".format(1) ) # run binlog sync sync_job_name = runner.run_sync_mode(self, conn_id) # verify tap and target exit codes exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) # check that version from state is unchanged state = menagerie.get_state(conn_id) bookmark = state['bookmarks'][self.tap_stream_id(t1)] self.assertEqual(expected_table_version, bookmark['version']) # Either the log_file is the same but the log_pos has increased or the log_file # has rotated and the numeric suffix has increased if expected_log_file == bookmark['log_file']: print("PATH A") self.assertGreater(bookmark['log_pos'], expected_log_pos) else: expected_log_file_suffix = re.search('^.*\.(\d+)$', expected_log_file).groups()[0] updated_log_file_suffix = re.search( '^.*\.(\d+)$', bookmark['log_file']).groups()[0] print("PATH B") self.assertGreater(int(updated_log_file_suffix), int(expected_log_file_suffix)) # Execute delete across tables using join prior to 3rd sync deleted_id = 4 with db_utils.get_db_connection( self.get_properties(), self.get_credentials()).cursor() as cur: delete_time = datetime.datetime.now() # DELETE T1, T2 # FROM T1 # INNER JOIN T2 ON T1.key = T2.key # WHERE condition; db = self.database_name() db_t1 = db + "." + t1 db_t2 = db + "." + t2 t1_key = db_t1 + ".id" t2_key = db_t2 + ".id" statement = "DELETE {}, {} ".format(db_t1, db_t2) + \ "FROM {} ".format(t1) + \ "INNER JOIN {} ON {} = {} ".format(db_t2, t1_key, t2_key) + \ "WHERE {} = {}".format(t1_key, deleted_id) cur.execute(statement) print( "\n\nMySQL DB Actions." + \ "\nNAME: {}\nTABLE: {}".format(self.database_name(), self.table_name_2()) + \ "\nTABLE: {}".format(self.table_name_2()) + \ "\nEVENTS: {} records deleted\n\n".format(1) ) # run binlog sync sync_job_name = runner.run_sync_mode(self, conn_id) # verify tap and target exit codes exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) # check that version from state is unchanged state = menagerie.get_state(conn_id) bookmark = state['bookmarks'][self.tap_stream_id(t1)] self.assertEqual(expected_table_version, bookmark['version']) target_records = runner.get_records_from_target_output() records_stream_1 = target_records[self.table_name_1()] upsert_records_1 = [ m['data'] for m in records_stream_1['messages'] if m['action'] == 'upsert' ] records_stream_2 = target_records[self.table_name_2()] upsert_records_2 = [ m['data'] for m in records_stream_2['messages'] if m['action'] == 'upsert' ] # make sure the record is in the target for both tables with a delete time deleted_at_t1 = upsert_records_1[0].get('_sdc_deleted_at') deleted_at_t1_timestamp = utils.strptime_to_utc( deleted_at_t1).timestamp() self.assertIsNotNone(deleted_at_t1) deleted_at_t2 = upsert_records_2[0].get('_sdc_deleted_at') deleted_at_t2_timestamp = utils.strptime_to_utc( deleted_at_t2).timestamp() self.assertIsNotNone(deleted_at_t2) # the delete times should be equal since it was a single transaction self.assertEqual(deleted_at_t1_timestamp, deleted_at_t2_timestamp) time_delta = delete_time.timestamp() - deleted_at_t1_timestamp print("Delete time vs record: difference in seconds", time_delta) self.assertLess(time_delta, 3) # time delta less than 3 seconds in magnitude
def initialize_db(self, engine, log_file_size): connection = db_utils.get_db_connection(self.get_properties(), self.get_credentials()) with connection.cursor() as cur: create_databases_sql = """ DROP DATABASE IF EXISTS {}; CREATE DATABASE {} """.format(self.database_name(), self.database_name()) cur.execute(create_databases_sql) for table_name in [self.table_name_1(), self.table_name_2()]: cur.execute( """ SELECT EXISTS ( SELECT 1 FROM information_schema.tables WHERE table_schema = %s AND table_name = %s);""", [self.database_name(), table_name]) existing_table = cur.fetchone()[0] if existing_table: cur.execute("DROP TABLE {}.{}".format( self.database_name(), table_name)) create_table_sql = """ CREATE TABLE {}.{} ( id BIGINT PRIMARY KEY, our_timestamp_1 TIMESTAMP DEFAULT CURRENT_TIMESTAMP, our_timestamp_2 TIMESTAMP DEFAULT CURRENT_TIMESTAMP, our_varchar_1 VARCHAR(255), our_varchar_2 VARCHAR(255) ) ENGINE = {} """.format(self.database_name(), table_name, engine) cur.execute(create_table_sql) # Ensure expected engine in use cur.execute( """ SELECT TABLE_NAME, ENGINE FROM information_schema.tables where table_schema = %s;""", [self.database_name()]) engine_in_use = cur._result.rows[0][1] self.assertEqual( engine, engine_in_use.upper(), msg="Unexpected engine in use: {}".format(engine_in_use)) # Ensure expected log file size in use if log_file_size is not None: cur.execute(""" SHOW VARIABLES LIKE 'innodb_log_file_size'; """) log_file_size_in_use = cur._result.rows[0][1] self.assertEqual( log_file_size, log_file_size_in_use, msg="Unexpected log file size in use: {}".format( log_file_size_in_use)) # Change innodb_log_file_size = 3 MB (3145728) # the default page size is 16384 (16KB) which has a min file size req of 3 MB # Timestamp is 4 bytes # 2 Timestamps + 2 varchar(255) ~= 518 bytes # 50331648 / 518 = 97165 n_1 = 98000 print("Generating {} records.".format(n_1)) expected_records, records = self.create_n_records(n_1) print("Inserting {} records in table {}.".format( n_1, self.table_name_1())) inc = 0 for record in records: self.insert_record(cur, record, self.table_name_1()) # we are inserting many records, show some output for tester's sanity inc += 1 if inc % 1000 == 0 and inc < 90001: s = "{}%".format(int(100 * inc / n_1)) if inc % 9000 == 0 else "." print(s, sep=' ', end='', flush=True) print("") # breakline n_2 = 5 # first five records from table 1 print("Inserting {} records in table {}.".format( n_2, self.table_name_2())) for i in range(n_2): self.insert_record(cur, records[i], self.table_name_2()) print("\n\nMySQL DB Instantiated." + \ "\nNAME: {}\nENGINE: {}".format(self.database_name(), engine_in_use) + \ "\nTABLE: {}\nEVENTS: {} records inserted".format(self.table_name_1(), n_1) + \ "\nTABLE: {}\nEVENTS: {} records inserted\n\n".format(self.table_name_2(), n_2)) return expected_records
def binlog_test(self): """ Test binlog replication • Verify an initial sync returns expected records of various datatypes • Verify no changes and a subsequent sync results in no replicated records • Update, Delete, and Insert records then verify the next sync captures these changes • Verify some log_file and log_pos state was persisted after each sync """ print("RUNNING {}\n\n".format(self.name())) conn_id = connections.ensure_connection(self) # run in check mode check_job_name = runner.run_check_mode(self, conn_id) # verify check exit codes exit_status = menagerie.get_exit_status(conn_id, check_job_name) menagerie.verify_check_exit_status(self, exit_status, check_job_name) expected_check_streams = {self.tap_stream_id()} expected_sync_streams = {self.table_name()} expected_pks = {self.table_name(): {'id'}} # verify the tap discovered the right streams found_catalogs = [ catalog for catalog in menagerie.get_catalogs(conn_id) if catalog['tap_stream_id'] in expected_check_streams ] self.assertGreaterEqual( len(found_catalogs), 1, msg="unable to locate schemas for connection {}".format(conn_id)) found_catalog_names = set( map(lambda c: c['tap_stream_id'], found_catalogs)) diff = expected_check_streams.symmetric_difference(found_catalog_names) self.assertEqual( len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) # verify that persisted streams have the correct properties test_catalog = found_catalogs[0] self.assertEqual(self.table_name(), test_catalog['stream_name']) print("discovered streams are correct") additional_md = [{ "breadcrumb": [], "metadata": { 'replication-method': 'LOG_BASED' } }] selected_metadata = connections.select_catalog_and_fields_via_metadata( conn_id, test_catalog, menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), additional_md) # clear state menagerie.set_state(conn_id, {}) # run initial full table sync sync_job_name = runner.run_sync_mode(self, conn_id) exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) # verify the persisted schema was correct records_by_stream = runner.get_records_from_target_output() self.maxDiff = None for stream, recs in records_by_stream.items(): self.assertEqual( recs['schema'], expected_schemas[stream], msg= "Persisted schema did not match expected schema for stream `{}`." .format(stream)) record_count_by_stream = runner.examine_target_output_file( self, conn_id, expected_sync_streams, expected_pks) self.assertEqual(record_count_by_stream, {self.table_name(): 2}) records_for_stream = runner.get_records_from_target_output()[ self.table_name()] messages_for_stream = records_for_stream['messages'] message_actions = [rec['action'] for rec in messages_for_stream] self.assertEqual( message_actions, ['activate_version', 'upsert', 'upsert', 'activate_version']) # ensure some log_file and log_pos state was persisted state = menagerie.get_state(conn_id) bookmark = state['bookmarks'][self.tap_stream_id()] self.assertIsNotNone(bookmark['log_file']) self.assertIsNotNone(bookmark['log_pos']) expected_log_file = bookmark['log_file'] expected_log_pos = bookmark['log_pos'] # grab version, log_file and log_pos from state to check later expected_table_version = records_for_stream['table_version'] self.assertEqual(expected_table_version, bookmark['version']) # check for expected records upsert_records = [ m['data'] for m in messages_for_stream if m['action'] == 'upsert' ] self.assertEqual([expected_rec_1, expected_rec_2], upsert_records) # run binlog sync sync_job_name = runner.run_sync_mode(self, conn_id) exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) # check that version state = menagerie.get_state(conn_id) bookmark = state['bookmarks'][self.tap_stream_id()] self.assertEqual(expected_table_version, bookmark['version']) # verify the persisted schema was correct records_by_stream = runner.get_records_from_target_output() for stream, recs in records_by_stream.items(): self.assertEqual( recs['schema'], expected_schemas[stream], msg= "Persisted schema did not match expected schema for stream `{}`." .format(stream)) # record count should be empty as we did not persist anything to the gate record_count_by_stream = runner.examine_target_output_file( self, conn_id, expected_sync_streams, expected_pks) self.assertEqual(record_count_by_stream, {}) # run some inserts, updates, and deletes in source updated_rec_1_varchar = 'THIS HAS BEEN UPDATED' with db_utils.get_db_connection( self.get_properties(), self.get_credentials()).cursor() as cur: cur.execute( "UPDATE {}.{} SET our_varchar = '{}' WHERE id = {}".format( self.database_name(), self.table_name(), updated_rec_1_varchar, rec_1['id'])) delete_time = datetime.datetime.now() cur.execute("DELETE FROM {}.{} WHERE id = {}".format( self.database_name(), self.table_name(), rec_2['id'])) self.insert_record(cur, rec_3) # run binlog sync sync_job_name = runner.run_sync_mode(self, conn_id) # verify tap and target exit codes exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) # check that version from state is unchanged state = menagerie.get_state(conn_id) bookmark = state['bookmarks'][self.tap_stream_id()] self.assertEqual(expected_table_version, bookmark['version']) # Either the log_file is the same but the log_pos has increased or the log_file # has rotated and the numeric suffix has increased if expected_log_file == bookmark['log_file']: print("PATH A") self.assertGreater(bookmark['log_pos'], expected_log_pos) else: expected_log_file_suffix = re.search('^.*\.(\d+)$', expected_log_file).groups()[0] updated_log_file_suffix = re.search( '^.*\.(\d+)$', bookmark['log_file']).groups()[0] print("PATH B") self.assertGreater(int(updated_log_file_suffix), int(expected_log_file_suffix)) expected_log_file = bookmark['log_file'] expected_log_pos = bookmark['log_pos'] updated_expected_rec_1 = copy.deepcopy(expected_rec_1) updated_expected_rec_2 = copy.deepcopy(expected_rec_2) updated_expected_rec_3 = copy.deepcopy(expected_rec_3) updated_expected_rec_1['our_varchar'] = updated_rec_1_varchar # Floats that come back from binlog provide more precision # than from SELECT based queries updated_expected_rec_1['our_unsigned_float'] = Decimal( "1.2345000505447388") updated_expected_rec_1['our_signed_float'] = -Decimal( "1.2345000505447388") # updated_expected_rec_1['_sdc_deleted_at'] = None updated_expected_rec_2['our_unsigned_float'] = Decimal( "2.4690001010894775") updated_expected_rec_2['our_signed_float'] = -Decimal( "2.4690001010894775") # updated_expected_rec_2['_sdc_deleted_at'] = None # updated_expected_rec_3['_sdc_deleted_at'] = None # verify the persisted schema was correct records_by_stream = runner.get_records_from_target_output() for stream, recs in records_by_stream.items(): self.assertEqual( recs['schema'], expected_schemas[stream], msg= "Persisted schema did not match expected schema for stream `{}`." .format(stream)) # check for expected records record_count_by_stream = runner.examine_target_output_file( self, conn_id, expected_sync_streams, expected_pks) self.assertEqual(record_count_by_stream, {self.table_name(): 3}) records_for_stream = runner.get_records_from_target_output()[ self.table_name()] messages_for_stream = records_for_stream['messages'] message_actions = [rec['action'] for rec in messages_for_stream] self.assertEqual(message_actions, ['upsert', 'upsert', 'upsert']) upsert_records = [ m['data'] for m in messages_for_stream if m['action'] == 'upsert' ] deleted_at_rec = upsert_records[1].get('_sdc_deleted_at') deleted_at_rec_timestamp = utils.strptime_to_utc( deleted_at_rec).timestamp() time_delta = delete_time.timestamp() - deleted_at_rec_timestamp print("Delete time vs record: difference in seconds", time_delta) self.assertIsNotNone(deleted_at_rec) assert (time_delta < 3) #i dunno # since we don't know exactly what the _sdc_deleted_at value will be # we will make the assertions we can make on that field here # and then remove it from all records prior to doing a full # record-level comparison self.assertIn('_sdc_deleted_at', upsert_records[0]) self.assertIn('_sdc_deleted_at', upsert_records[1]) self.assertIn('_sdc_deleted_at', upsert_records[2]) self.assertIsNone(upsert_records[0].get('_sdc_deleted_at')) self.assertIsNotNone(upsert_records[1].get('_sdc_deleted_at')) self.assertIsNone(upsert_records[2].get('_sdc_deleted_at')) del upsert_records[0]['_sdc_deleted_at'] del upsert_records[1]['_sdc_deleted_at'] del upsert_records[2]['_sdc_deleted_at'] self.assertEqual([ updated_expected_rec_1, updated_expected_rec_2, updated_expected_rec_3 ], upsert_records) # run binlog sync sync_job_name = runner.run_sync_mode(self, conn_id) # verify tap and target exit codes exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) # check that version from state is unchanged state = menagerie.get_state(conn_id) bookmark = state['bookmarks'][self.tap_stream_id()] self.assertEqual(expected_table_version, bookmark['version']) # verify the persisted schema was correct records_by_stream = runner.get_records_from_target_output() self.maxDiff = None for stream, recs in records_by_stream.items(): self.assertEqual( recs['schema'], expected_schemas[stream], msg= "Persisted schema did not match expected schema for stream `{}`." .format(stream)) # record count should be empty as we did not persist anything to the gate record_count_by_stream = runner.examine_target_output_file( self, conn_id, expected_sync_streams, expected_pks) self.assertEqual(record_count_by_stream, {})
def initialize_db(self, engine): connection = db_utils.get_db_connection(self.get_properties(), self.get_credentials()) with connection.cursor() as cur: create_databases_sql = """ DROP DATABASE IF EXISTS {}; CREATE DATABASE {}; """.format(self.database_name(), self.database_name()) cur.execute(create_databases_sql) cur.execute( """ SELECT EXISTS ( SELECT 1 FROM information_schema.tables WHERE table_schema = %s AND table_name = %s);""", [self.database_name(), self.table_name()]) existing_table = cur.fetchone()[0] if existing_table: cur.execute("DROP TABLE {}.{}".format(self.database_name(), self.table_name())) create_table_sql = """ CREATE TABLE {}.{} ( id BIGINT PRIMARY KEY, our_char CHAR, our_enum ENUM('one', 'two', 'three'), our_longtext LONGTEXT, our_mediumtext MEDIUMTEXT, our_text TEXT, our_varchar VARCHAR(255), our_unsigned_tinyint TINYINT UNSIGNED, our_signed_tinyint TINYINT, our_unsigned_smallint SMALLINT UNSIGNED, our_signed_smallint SMALLINT, our_unsigned_mediumint MEDIUMINT UNSIGNED, our_signed_mediumint MEDIUMINT, our_unsigned_int INT UNSIGNED, our_signed_int INT, our_unsigned_bigint BIGINT UNSIGNED, our_signed_bigint BIGINT, our_unsigned_decimal_1 DECIMAL(11,2) UNSIGNED, our_signed_decimal_1 DECIMAL(11,2), our_unsigned_decimal_2 DECIMAL UNSIGNED, our_signed_decimal_2 DECIMAL, our_unsigned_float FLOAT UNSIGNED, our_signed_float FLOAT, our_unsigned_double DOUBLE UNSIGNED, our_signed_double DOUBLE, our_bit_1 BIT(1), our_datetime DATETIME, our_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP, our_date DATE, our_time TIME, our_boolean BOOLEAN ) ENGINE = {} """.format(self.database_name(), self.table_name(), engine) cur.execute(create_table_sql) # Ensure expected engine in use cur.execute( """ SELECT TABLE_NAME, ENGINE FROM information_schema.tables where table_schema = %s;""", [self.database_name()]) engine_in_use = cur._result.rows[0][1] self.assertEqual( engine, engine_in_use.upper(), msg="Unexpected engine in use: {}".format(engine_in_use)) for record in [rec_1, rec_2]: self.insert_record(cur, record) print("\n\nMySQL DB Instantiated." + \ "\nNAME: {}\nENGINE: {}".format(self.database_name(), engine_in_use) + \ "\nTABLE: {}\nEVENTS: 2 records inserted\n\n".format(self.table_name()))
def create_user_table() -> None: conn = get_db_connection() conn.execute('CREATE TABLE user (user_name TEXT PRIMARY KEY, user_alias TEXT )') conn.commit() conn.close()
def create_meetings_table() -> None: conn = get_db_connection() conn.execute('CREATE TABLE meeting (meeting_id TEXT, meeting_pw TEXT, ' 'user_name TEXT, meeting_name TEXT, meeting_company TEXT, FOREIGN KEY (user_name) REFERENCES user(user_name))') conn.commit() conn.close()