def create_replication_slot(self, slot_name, slot_type=None, output_plugin=None): """Create streaming replication slot.""" command = "CREATE_REPLICATION_SLOT %s " % quote_ident(slot_name, self) if slot_type is None: slot_type = self.connection.replication_type if slot_type == REPLICATION_LOGICAL: if output_plugin is None: raise psycopg2.ProgrammingError( "output plugin name is required to create " "logical replication slot") command += "LOGICAL %s" % quote_ident(output_plugin, self) elif slot_type == REPLICATION_PHYSICAL: if output_plugin is not None: raise psycopg2.ProgrammingError( "cannot specify output plugin name when creating " "physical replication slot") command += "PHYSICAL" else: raise psycopg2.ProgrammingError( "unrecognized replication type: %s" % repr(slot_type)) self.execute(command)
def test_unicode_ident(self): from psycopg2.extensions import quote_ident snowman = u"\u2603" quoted = '"' + snowman + '"' if sys.version_info[0] < 3: self.assertEqual(quote_ident(snowman, self.conn), quoted.encode('utf8')) else: self.assertEqual(quote_ident(snowman, self.conn), quoted)
def test_manual_wrap_extension_quote_ident_standalone(): from ddtrace import patch_all patch_all() from psycopg2.extensions import quote_ident # NOTE: this will crash if it doesn't work. # TypeError: argument 2 must be a connection or a cursor conn = psycopg2.connect(**POSTGRES_CONFIG) quote_ident('foo', conn)
def start_replication(self, slot_name=None, slot_type=None, start_lsn=0, timeline=0, options=None, decode=False): """Start replication stream.""" command = "START_REPLICATION " if slot_type is None: slot_type = self.connection.replication_type if slot_type == REPLICATION_LOGICAL: if slot_name: command += "SLOT %s " % quote_ident(slot_name, self) else: raise psycopg2.ProgrammingError( "slot name is required for logical replication") command += "LOGICAL " elif slot_type == REPLICATION_PHYSICAL: if slot_name: command += "SLOT %s " % quote_ident(slot_name, self) # don't add "PHYSICAL", before 9.4 it was just START_REPLICATION XXX/XXX else: raise psycopg2.ProgrammingError( "unrecognized replication type: %s" % repr(slot_type)) if type(start_lsn) is str: lsn = start_lsn.split('/') lsn = "%X/%08X" % (int(lsn[0], 16), int(lsn[1], 16)) else: lsn = "%X/%08X" % ((start_lsn >> 32) & 0xFFFFFFFF, start_lsn & 0xFFFFFFFF) command += lsn if timeline != 0: if slot_type == REPLICATION_LOGICAL: raise psycopg2.ProgrammingError( "cannot specify timeline for logical replication") command += " TIMELINE %d" % timeline if options: if slot_type == REPLICATION_PHYSICAL: raise psycopg2.ProgrammingError( "cannot specify output plugin options for physical replication") command += " (" for k, v in options.iteritems(): if not command.endswith('('): command += ", " command += "%s %s" % (quote_ident(k, self), _A(str(v))) command += ")" self.start_replication_expert(command, decode=decode)
def insert_record(cursor, table_name, data): our_keys = list(data.keys()) our_keys.sort() our_values = list(map(lambda k: data.get(k), our_keys)) columns_sql = ", \n ".join(our_keys) value_sql_array = [] for k in our_keys: if k == 'our_json_array': value_sql_array.append("%s::json[]") elif k == 'our_jsonb_array': value_sql_array.append("%s::jsonb[]") else: value_sql_array.append("%s") value_sql = ",".join(value_sql_array) insert_sql = """ INSERT INTO {} ( {} ) VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) cursor.execute(insert_sql, our_values)
def test_run(self): conn_id = connections.ensure_connection(self) # run in check mode check_job_name = runner.run_check_mode(self, conn_id) # verify check exit codes exit_status = menagerie.get_exit_status(conn_id, check_job_name) menagerie.verify_check_exit_status(self, exit_status, check_job_name) # verify discovery produced (at least) 1 expected catalog found_catalogs = [ found_catalog for found_catalog in menagerie.get_catalogs(conn_id) if found_catalog['tap_stream_id'] in self.expected_check_streams() ] self.assertGreaterEqual(len(found_catalogs), 1) # verify the tap discovered the expected streams found_catalog_names = { catalog['tap_stream_id'] for catalog in found_catalogs } self.assertSetEqual(self.expected_check_streams(), found_catalog_names) # verify that persisted streams have the correct properties test_catalog = found_catalogs[0] self.assertEqual(test_table_name, test_catalog['stream_name']) print("discovered streams are correct") # perform table selection print('selecting {} and all fields within the table'.format( test_table_name)) schema_and_metadata = menagerie.get_annotated_schema( conn_id, test_catalog['stream_id']) additional_md = [{ "breadcrumb": [], "metadata": { 'replication-method': 'FULL_TABLE' } }] _ = connections.select_catalog_and_fields_via_metadata( conn_id, test_catalog, schema_and_metadata, additional_md) # clear state menagerie.set_state(conn_id, {}) # run sync job 1 and verify exit codes sync_job_name = runner.run_sync_mode(self, conn_id) exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) # get records record_count_by_stream = runner.examine_target_output_file( self, conn_id, self.expected_sync_streams(), self.expected_pks()) records_by_stream = runner.get_records_from_target_output() table_version_1 = records_by_stream[test_table_name]['table_version'] messages = records_by_stream[test_table_name]['messages'] # verify the execpted number of records were replicated self.assertEqual(3, record_count_by_stream[test_table_name]) # verify the message actions match expectations self.assertEqual(5, len(messages)) self.assertEqual('activate_version', messages[0]['action']) self.assertEqual('upsert', messages[1]['action']) self.assertEqual('upsert', messages[2]['action']) self.assertEqual('upsert', messages[3]['action']) self.assertEqual('activate_version', messages[4]['action']) # verify the persisted schema matches expectations self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) # verify replicated records match expectations self.assertDictEqual(self.expected_records[0], messages[1]['data']) self.assertDictEqual(self.expected_records[1], messages[2]['data']) self.assertDictEqual(self.expected_records[2], messages[3]['data']) print("records are correct") # grab bookmarked state state = menagerie.get_state(conn_id) bookmark = state['bookmarks'][ 'dev-public-postgres_full_table_replication_test'] # verify state and bookmarks meet expectations self.assertIsNone(state['currently_syncing']) self.assertIsNone(bookmark.get('lsn')) self.assertIsNone(bookmark.get('replication_key')) self.assertIsNone(bookmark.get('replication_key_value')) self.assertEqual(table_version_1, bookmark['version']) #---------------------------------------------------------------------- # invoke the sync job AGAIN and get the same 3 records #---------------------------------------------------------------------- # run sync job 2 and verify exit codes sync_job_name = runner.run_sync_mode(self, conn_id) exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) # get records record_count_by_stream = runner.examine_target_output_file( self, conn_id, self.expected_sync_streams(), self.expected_pks()) records_by_stream = runner.get_records_from_target_output() table_version_2 = records_by_stream[test_table_name]['table_version'] messages = records_by_stream[test_table_name]['messages'] # verify the execpted number of records were replicated self.assertEqual(3, record_count_by_stream[test_table_name]) # verify the message actions match expectations self.assertEqual(4, len(messages)) self.assertEqual('upsert', messages[0]['action']) self.assertEqual('upsert', messages[1]['action']) self.assertEqual('upsert', messages[2]['action']) self.assertEqual('activate_version', messages[3]['action']) # verify the new table version increased on the second sync self.assertGreater(table_version_2, table_version_1) # verify the persisted schema still matches expectations self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) # verify replicated records still match expectations self.assertDictEqual(self.expected_records[0], messages[0]['data']) self.assertDictEqual(self.expected_records[1], messages[1]['data']) self.assertDictEqual(self.expected_records[2], messages[2]['data']) # grab bookmarked state state = menagerie.get_state(conn_id) bookmark = state['bookmarks'][ 'dev-public-postgres_full_table_replication_test'] # verify state and bookmarks meet expectations self.assertIsNone(state['currently_syncing']) self.assertIsNone(bookmark.get('lsn')) self.assertIsNone(bookmark.get('replication_key')) self.assertIsNone(bookmark.get('replication_key_value')) self.assertEqual(table_version_2, bookmark['version']) #---------------------------------------------------------------------- # invoke the sync job AGAIN following various manipulations to the data #---------------------------------------------------------------------- with db_utils.get_test_connection('dev') as conn: conn.autocommit = True with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: # NB | We will perform the following actions prior to the next sync: # [Action (EXPECTED RESULT)] # Insert a record # Insert a record to be updated prior to sync # Insert a record to be deleted prior to sync (NOT REPLICATED) # Update an existing record # Update a newly inserted record # Delete an existing record # Delete a newly inserted record # inserting... # a new record nyc_tz = pytz.timezone('America/New_York') our_time_offset = "-04:00" our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) our_ts_tz = nyc_tz.localize(our_ts) our_time = datetime.time(6, 6, 6) our_time_tz = our_time.isoformat() + our_time_offset our_date = datetime.date(1970, 7, 1) my_uuid = str(uuid.uuid1()) self.inserted_records.append({ 'our_varchar': "our_varchar 2", 'our_varchar_10': "varchar_10", 'our_text': "some text 2", 'our_integer': 44101, 'our_smallint': 2, 'our_bigint': 1000001, 'our_decimal': decimal.Decimal('9876543210.02'), quote_ident('OUR TS', cur): our_ts, quote_ident('OUR TS TZ', cur): our_ts_tz, quote_ident('OUR TIME', cur): our_time, quote_ident('OUR TIME TZ', cur): our_time_tz, quote_ident('OUR DATE', cur): our_date, 'our_double': decimal.Decimal('1.1'), 'our_real': decimal.Decimal('1.2'), 'our_boolean': True, 'our_bit': '1', 'our_json': json.dumps({'nymn': 77}), 'our_jsonb': json.dumps({'burgers': 'good++'}), 'our_uuid': my_uuid, 'our_citext': 'cyclops 2', 'our_store': 'dances=>"floor",name=>"betty"', 'our_cidr': '192.168.101.128/25', 'our_inet': '192.168.101.128/24', 'our_mac': '08:00:2b:01:02:04', 'our_money': '$0.98789' }) self.expected_records.append({ 'id': 4, 'our_varchar': "our_varchar 2", 'our_varchar_10': "varchar_10", 'our_text': "some text 2", 'our_integer': 44101, 'our_smallint': 2, 'our_bigint': 1000001, 'our_decimal': decimal.Decimal('9876543210.02'), 'OUR TS': self.expected_ts(our_ts), 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), 'OUR TIME': str(our_time), 'OUR TIME TZ': str(our_time_tz), 'OUR DATE': '1970-07-01T00:00:00+00:00', 'our_double': decimal.Decimal('1.1'), 'our_real': decimal.Decimal('1.2'), 'our_boolean': True, 'our_bit': True, 'our_json': '{"nymn": 77}', 'our_jsonb': '{"burgers": "good++"}', 'our_uuid': self.inserted_records[-1]['our_uuid'], 'our_citext': self.inserted_records[-1]['our_citext'], 'our_store': { "name": "betty", "dances": "floor" }, 'our_cidr': self.inserted_records[-1]['our_cidr'], 'our_inet': self.inserted_records[-1]['our_inet'], 'our_mac': self.inserted_records[-1]['our_mac'], 'our_money': '$0.99', 'our_alignment_enum': None, }) # a new record which we will then update prior to sync our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) nyc_tz = pytz.timezone('America/New_York') our_ts_tz = nyc_tz.localize(our_ts) our_time = datetime.time(12, 11, 10) our_time_tz = our_time.isoformat() + "-04:00" our_date = datetime.date(1999, 9, 9) my_uuid = str(uuid.uuid1()) self.inserted_records.append({ 'our_varchar': "our_varchar 4", 'our_varchar_10': "varchar_3", 'our_text': "some text 4", 'our_integer': 55200, 'our_smallint': 1, 'our_bigint': 100000, 'our_decimal': decimal.Decimal('1234567899.99'), quote_ident('OUR TS', cur): our_ts, quote_ident('OUR TS TZ', cur): our_ts_tz, quote_ident('OUR TIME', cur): our_time, quote_ident('OUR TIME TZ', cur): our_time_tz, quote_ident('OUR DATE', cur): our_date, 'our_double': decimal.Decimal('1.1'), 'our_real': decimal.Decimal('1.2'), 'our_boolean': True, 'our_bit': '0', 'our_json': json.dumps('some string'), 'our_jsonb': json.dumps(['burgers are good']), 'our_uuid': my_uuid, 'our_store': 'size=>"small",name=>"betty"', 'our_citext': 'cyclops 3', 'our_cidr': '192.168.101.128/25', 'our_inet': '192.168.101.128/24', 'our_mac': '08:00:2b:01:02:04', 'our_money': None, }) self.expected_records.append({ 'our_decimal': decimal.Decimal('1234567899.99'), 'our_text': 'some text 4', 'our_bit': False, 'our_integer': 55200, 'our_double': decimal.Decimal('1.1'), 'id': 5, 'our_json': self.inserted_records[-1]['our_json'], 'our_boolean': True, 'our_jsonb': self.inserted_records[-1]['our_jsonb'], 'our_bigint': 100000, 'OUR TS': self.expected_ts(our_ts), 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), 'OUR TIME': str(our_time), 'OUR TIME TZ': str(our_time_tz), 'our_store': { "name": "betty", "size": "small" }, 'our_smallint': 1, 'OUR DATE': '1999-09-09T00:00:00+00:00', 'our_varchar': 'our_varchar 4', 'our_uuid': self.inserted_records[-1]['our_uuid'], 'our_real': decimal.Decimal('1.2'), 'our_varchar_10': 'varchar_3', 'our_citext': 'cyclops 3', 'our_cidr': '192.168.101.128/25', 'our_inet': '192.168.101.128/24', 'our_mac': '08:00:2b:01:02:04', 'our_money': None, 'our_alignment_enum': None, }) # a new record to be deleted prior to sync our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) nyc_tz = pytz.timezone('America/New_York') our_ts_tz = nyc_tz.localize(our_ts) our_time = datetime.time(12, 11, 10) our_time_tz = our_time.isoformat() + "-04:00" our_date = datetime.date(1999, 9, 9) my_uuid = str(uuid.uuid1()) self.inserted_records.append({ 'our_varchar': "our_varchar 4", 'our_varchar_10': "varchar_3", 'our_text': "some text 4", 'our_integer': 55200, 'our_smallint': 1, 'our_bigint': 100000, 'our_decimal': decimal.Decimal('1234567899.99'), quote_ident('OUR TS', cur): our_ts, quote_ident('OUR TS TZ', cur): our_ts_tz, quote_ident('OUR TIME', cur): our_time, quote_ident('OUR TIME TZ', cur): our_time_tz, quote_ident('OUR DATE', cur): our_date, 'our_double': decimal.Decimal('1.1'), 'our_real': decimal.Decimal('1.2'), 'our_boolean': True, 'our_bit': '0', 'our_json': json.dumps('some string'), 'our_jsonb': json.dumps(['burgers are good']), 'our_uuid': my_uuid, 'our_store': 'size=>"small",name=>"betty"', 'our_citext': 'cyclops 3', 'our_cidr': '192.168.101.128/25', 'our_inet': '192.168.101.128/24', 'our_mac': '08:00:2b:01:02:04', 'our_money': None, }) self.expected_records.append({ 'our_decimal': decimal.Decimal('1234567899.99'), 'our_text': 'some text 4', 'our_bit': False, 'our_integer': 55200, 'our_double': decimal.Decimal('1.1'), 'id': 6, 'our_json': self.inserted_records[-1]['our_json'], 'our_boolean': True, 'our_jsonb': self.inserted_records[-1]['our_jsonb'], 'our_bigint': 100000, 'OUR TS': self.expected_ts(our_ts), 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), 'OUR TIME': str(our_time), 'OUR TIME TZ': str(our_time_tz), 'our_store': { "name": "betty", "size": "small" }, 'our_smallint': 1, 'OUR DATE': '1999-09-09T00:00:00+00:00', 'our_varchar': 'our_varchar 4', 'our_uuid': self.inserted_records[-1]['our_uuid'], 'our_real': decimal.Decimal('1.2'), 'our_varchar_10': 'varchar_3', 'our_citext': 'cyclops 3', 'our_cidr': '192.168.101.128/25', 'our_inet': '192.168.101.128/24', 'our_mac': '08:00:2b:01:02:04', 'our_money': None, 'our_alignment_enum': None, }) db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) # updating ... # an existing record canon_table_name = db_utils.canonicalized_table_name( cur, test_schema_name, test_table_name) record_pk = 1 our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) our_ts_tz = nyc_tz.localize(our_ts) updated_data = { "OUR TS TZ": our_ts_tz, "our_double": decimal.Decimal("6.6"), "our_money": "$0.00" } self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz( our_ts_tz) self.expected_records[0]["our_double"] = decimal.Decimal("6.6") self.expected_records[0]["our_money"] = "$0.00" db_utils.update_record(cur, canon_table_name, record_pk, updated_data) # a newly inserted record canon_table_name = db_utils.canonicalized_table_name( cur, test_schema_name, test_table_name) record_pk = 5 our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) our_ts_tz = nyc_tz.localize(our_ts) updated_data = { "OUR TS TZ": our_ts_tz, "our_double": decimal.Decimal("6.6"), "our_money": "$0.00" } self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz( our_ts_tz) self.expected_records[4]["our_double"] = decimal.Decimal("6.6") self.expected_records[4]["our_money"] = "$0.00" db_utils.update_record(cur, canon_table_name, record_pk, updated_data) # deleting # an existing record record_pk = 2 db_utils.delete_record(cur, canon_table_name, record_pk) # a newly inserted record record_pk = 6 db_utils.delete_record(cur, canon_table_name, record_pk) #---------------------------------------------------------------------- # invoke the sync job AGAIN after vairous manipulations #---------------------------------------------------------------------- # run sync job 3 and verify exit codes sync_job_name = runner.run_sync_mode(self, conn_id) exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) # get records record_count_by_stream = runner.examine_target_output_file( self, conn_id, self.expected_sync_streams(), self.expected_pks()) records_by_stream = runner.get_records_from_target_output() table_version_3 = records_by_stream[test_table_name]['table_version'] messages = records_by_stream[test_table_name]['messages'] # verify the execpted number of records were replicated self.assertEqual(4, record_count_by_stream[test_table_name]) # verify the message actions match expectations self.assertEqual(5, len(messages)) self.assertEqual('upsert', messages[0]['action']) self.assertEqual('upsert', messages[1]['action']) self.assertEqual('upsert', messages[2]['action']) self.assertEqual('upsert', messages[3]['action']) self.assertEqual('activate_version', messages[4]['action']) # verify the new table version increased on the second sync self.assertGreater(table_version_3, table_version_2) # verify the persisted schema still matches expectations self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) # NB | This is a little tough to track mentally so here's a breakdown of # the order of operations by expected records indexes: # Prior to Sync 1 # insert 0, 1, 2 # Prior to Sync 2 # No db changes # Prior to Sync 3 # insert 3, 4, 5 # update 0, 4 # delete 1, 5 # Resulting Synced Records: 2, 3, 0, 4 # verify replicated records still match expectations self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update # grab bookmarked state state = menagerie.get_state(conn_id) bookmark = state['bookmarks'][ 'dev-public-postgres_full_table_replication_test'] # verify state and bookmarks meet expectations self.assertIsNone(state['currently_syncing']) self.assertIsNone(bookmark.get('lsn')) self.assertIsNone(bookmark.get('replication_key')) self.assertIsNone(bookmark.get('replication_key_value')) self.assertEqual(table_version_3, bookmark['version'])
def as_string(self, context): return ext.quote_ident(self._wrapped, context)
def canonicalized_table_name(schema, table, cur): return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur))
def drop_replication_slot(self, slot_name): """Drop streaming replication slot.""" command = "DROP_REPLICATION_SLOT %s" % quote_ident(slot_name, self) self.execute(command)
def test_run(self): conn_id = connections.ensure_connection(self) # run in check mode check_job_name = runner.run_check_mode(self, conn_id) # verify check exit codes exit_status = menagerie.get_exit_status(conn_id, check_job_name) menagerie.verify_check_exit_status(self, exit_status, check_job_name) # verify the tap discovered the right streams found_catalogs = [ fc for fc in menagerie.get_catalogs(conn_id) if fc['tap_stream_id'] in self.expected_check_streams() ] self.assertGreaterEqual( len(found_catalogs), 1, msg="unable to locate schemas for connection {}".format(conn_id)) found_catalog_names = set( map(lambda c: c['tap_stream_id'], found_catalogs)) diff = self.expected_check_streams().symmetric_difference( found_catalog_names) self.assertEqual( len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) # verify that persisted streams have the correct properties test_catalog = found_catalogs[0] self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) print("discovered streams are correct") additional_md = [{ "breadcrumb": [], "metadata": { 'replication-method': 'LOG_BASED' } }] #don't selcted our_text_2 _ = connections.select_catalog_and_fields_via_metadata( conn_id, test_catalog, menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), additional_md, ['our_text_2']) # clear state menagerie.set_state(conn_id, {}) sync_job_name = runner.run_sync_mode(self, conn_id) # verify tap and target exit codes exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) record_count_by_stream = runner.examine_target_output_file( self, conn_id, self.expected_sync_streams(), self.expected_pks()) self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 4}) records_by_stream = runner.get_records_from_target_output() table_version = records_by_stream['postgres_logical_replication_test'][ 'table_version'] self.assertEqual( records_by_stream['postgres_logical_replication_test']['messages'] [0]['action'], 'activate_version') self.assertEqual( records_by_stream['postgres_logical_replication_test']['messages'] [1]['action'], 'upsert') self.assertEqual( records_by_stream['postgres_logical_replication_test']['messages'] [2]['action'], 'upsert') self.assertEqual( records_by_stream['postgres_logical_replication_test']['messages'] [3]['action'], 'upsert') self.assertEqual( records_by_stream['postgres_logical_replication_test']['messages'] [4]['action'], 'upsert') self.assertEqual( records_by_stream['postgres_logical_replication_test']['messages'] [5]['action'], 'activate_version') # verify state and bookmarks state = menagerie.get_state(conn_id) bookmark = state['bookmarks'][ 'logical_1-public-postgres_logical_replication_test'] self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") self.assertIsNotNone(bookmark['lsn'], msg="expected bookmark for stream to have an lsn") lsn_1 = bookmark['lsn'] self.assertEqual(bookmark['version'], table_version, msg="expected bookmark for stream to match version") #---------------------------------------------------------------------- # invoke the sync job again after adding a record #---------------------------------------------------------------------- print("inserting a record 5") with db_utils.get_test_connection(test_db) as conn: conn.autocommit = True with conn.cursor() as cur: #insert fixture data 3 our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) nyc_tz = pytz.timezone('America/New_York') our_ts_tz = nyc_tz.localize(our_ts) our_time = datetime.time(3, 4, 5) our_time_tz = our_time.isoformat() + "-04:00" our_date = datetime.date(1933, 3, 3) my_uuid = str(uuid.uuid1()) #STRINGS: #OUR TS: '1993-03-03 03:03:03.333333' #OUR TS TZ: '1993-03-03 08:03:03.333333+00' #'OUR TIME': '03:04:05' #'OUR TIME TZ': '03:04:05+00' self.rec_5 = { 'our_varchar': "our_varchar 5", # str 'our_varchar_10': "varchar13", # str 'our_text': "some text 3", #str 'our_text_2': "NOT SELECTED", 'our_integer': 96000, #int 'our_smallint': 3, # int 'our_bigint': 3000000, #int 'our_decimal': decimal.Decimal( '1234567890.03' ), #1234567890.03 / our_decimal is a <class 'float'> quote_ident('OUR TS', cur): our_ts, # str '1993-03-03 03:03:03.333333' quote_ident('OUR TS TZ', cur): our_ts_tz, #str '1993-03-03 08:03:03.333333+00' quote_ident('OUR TIME', cur): our_time, # str '03:04:05' quote_ident('OUR TIME TZ', cur): our_time_tz, # str '03:04:05+00' quote_ident('OUR DATE', cur): our_date, #1933-03-03 / OUR DATE is a <class 'str'> 'our_double': 3.3, #3.3 / our_double is a <class 'float'> 'our_real': 6.6, #6.6 / our_real is a <class 'float'> 'our_boolean': True, #boolean 'our_bit': '1', #string 'our_json': json.dumps({'secret': 33}), #string 'our_jsonb': json.dumps(['burgers make me hungry']), 'our_uuid': my_uuid, #string 'our_store': 'jumps=>"high",name=>"betty"', #string 'our_citext': 'maGICKal 3', 'our_cidr': '192.168.102.128/32', 'our_inet': '192.168.102.128/32', 'our_mac': '08:00:2b:01:02:05', 'our_money': '$412.1234' } insert_record(cur, test_table_name, self.rec_5) sync_job_name = runner.run_sync_mode(self, conn_id) # verify tap and target exit codes exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) record_count_by_stream = runner.examine_target_output_file( self, conn_id, self.expected_sync_streams(), self.expected_pks()) self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) records_by_stream = runner.get_records_from_target_output() self.assertTrue(len(records_by_stream) > 0) for stream, recs in records_by_stream.items(): # verify the persisted schema was correct self.assertDictEqual(recs['schema'], expected_schemas[stream]) self.assertEqual( 1, len(records_by_stream['postgres_logical_replication_test'] ['messages'])) actual_record_2 = records_by_stream[ 'postgres_logical_replication_test']['messages'][0]['data'] actual_sdc_lsn_2 = int(actual_record_2['_sdc_lsn']) del actual_record_2['_sdc_lsn'] expected_inserted_record = { 'our_text': 'some text 3', 'our_real': decimal.Decimal('6.6'), '_sdc_deleted_at': None, 'our_store': { 'name': 'betty', 'jumps': 'high' }, 'our_bigint': 3000000, 'our_varchar': 'our_varchar 5', 'our_double': decimal.Decimal('3.3'), 'our_bit': True, 'our_uuid': self.rec_5['our_uuid'], 'OUR TS': '1993-03-03T03:03:03.333333+00:00', 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', 'OUR TIME': '03:04:05', 'OUR TIME TZ': '03:04:05-04:00', 'OUR DATE': '1933-03-03T00:00:00+00:00', 'our_decimal': decimal.Decimal('1234567890.03'), 'id': 5, 'our_varchar_10': 'varchar13', 'our_json': '{"secret": 33}', 'our_jsonb': self.rec_5['our_jsonb'], 'our_smallint': 3, 'our_integer': 96000, 'our_boolean': True, 'our_citext': 'maGICKal 3', 'our_cidr': self.rec_5['our_cidr'], 'our_inet': '192.168.102.128', 'our_mac': self.rec_5['our_mac'], 'our_alignment_enum': None, 'our_money': '$412.12' } self.assertDictEqual(expected_inserted_record, actual_record_2) self.assertEqual( records_by_stream['postgres_logical_replication_test']['messages'] [0]['action'], 'upsert') print("inserted record is correct") state = menagerie.get_state(conn_id) chicken_bookmark = state['bookmarks'][ 'logical_1-public-postgres_logical_replication_test'] self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") self.assertIsNotNone( chicken_bookmark['lsn'], msg= "expected bookmark for stream public-postgres_logical_replication_test to have an scn" ) lsn_2 = chicken_bookmark['lsn'] self.assertTrue(lsn_2 >= lsn_1) #table_version does NOT change self.assertEqual( chicken_bookmark['version'], table_version, msg= "expected bookmark for stream public-postgres_logical_replication_test to match version" ) #---------------------------------------------------------------------- # invoke the sync job again after deleting a record #---------------------------------------------------------------------- print("delete row from source db") with db_utils.get_test_connection(test_db) as conn: with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: cur.execute("DELETE FROM {} WHERE id = 3".format( canonicalized_table_name(test_schema_name, test_table_name, cur))) sync_job_name = runner.run_sync_mode(self, conn_id) # verify tap and target exit codes exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) record_count_by_stream = runner.examine_target_output_file( self, conn_id, self.expected_sync_streams(), self.expected_pks()) # verify the inserted record's lsn is less than or equal to the bookmarked lsn self.assertGreaterEqual(lsn_2, actual_sdc_lsn_2) expected_record_count = 1 if actual_sdc_lsn_2 < lsn_2 else 2 self.assertEqual( record_count_by_stream, {'postgres_logical_replication_test': expected_record_count}) records_by_stream = runner.get_records_from_target_output() for stream, recs in records_by_stream.items(): # verify the persisted schema was correct self.assertEqual( recs['schema'], expected_schemas[stream], msg= "Persisted schema did not match expected schema for stream `{}`." .format(stream)) # if there are 2 records... if expected_record_count == 2: # the 1st message will be the previous insert insert_message = records_by_stream[ 'postgres_logical_replication_test']['messages'][0]['data'] del insert_message['_sdc_lsn'] self.assertDictEqual(insert_message, expected_inserted_record) #the 2nd message will be the delete delete_message = records_by_stream[ 'postgres_logical_replication_test']['messages'][ expected_record_count - 1] self.assertEqual(delete_message['action'], 'upsert') sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') self.assertIsNotNone(sdc_deleted_at) self.assertEqual(delete_message['data']['id'], 3) print("deleted record is correct") state = menagerie.get_state(conn_id) bookmark = state['bookmarks'][ 'logical_1-public-postgres_logical_replication_test'] self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") self.assertIsNotNone( bookmark['lsn'], msg="expected bookmark for stream ROOT-CHICKEN to have an scn") lsn_3 = bookmark['lsn'] self.assertTrue(lsn_3 >= lsn_2) #---------------------------------------------------------------------- # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format #---------------------------------------------------------------------- print("delete row from source db") with db_utils.get_test_connection(test_db) as conn: with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: cur.execute( "DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)" .format( canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) sync_job_name = runner.run_sync_mode(self, conn_id) # verify tap and target exit codes exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) record_count_by_stream = runner.examine_target_output_file( self, conn_id, self.expected_sync_streams(), self.expected_pks()) self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 2}) records_by_stream = runner.get_records_from_target_output() for stream, recs in records_by_stream.items(): # verify the persisted schema was correct self.assertEqual( recs['schema'], expected_schemas[stream], msg= "Persisted schema did not match expected schema for stream `{}`." .format(stream)) #first record will be the previous delete delete_message = records_by_stream[ 'postgres_logical_replication_test']['messages'][0] sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') self.assertIsNotNone(sdc_deleted_at) self.assertEqual(delete_message['data']['id'], 3) #the 2nd message will be the more recent delete delete_message = records_by_stream[ 'postgres_logical_replication_test']['messages'][1] self.assertEqual(delete_message['action'], 'upsert') sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') self.assertIsNotNone(sdc_deleted_at) self.assertEqual(delete_message['data']['id'], 2) print("deleted record is correct") state = menagerie.get_state(conn_id) bookmark = state['bookmarks'][ 'logical_1-public-postgres_logical_replication_test'] self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") self.assertIsNotNone( bookmark['lsn'], msg="expected bookmark for stream ROOT-CHICKEN to have an scn") lsn_4 = bookmark['lsn'] self.assertTrue(lsn_4 >= lsn_3) #table_version does NOT change self.assertEqual( bookmark['version'], table_version, msg= "expected bookmark for stream postgres_logical_replication_test to match version" ) #---------------------------------------------------------------------- # invoke the sync job again after deleting a record using the 'id IN (<id>, <id>)' format #---------------------------------------------------------------------- print("delete row from source db") with db_utils.get_test_connection(test_db) as conn: with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format( canonicalized_table_name(test_schema_name, test_table_name, cur))) sync_job_name = runner.run_sync_mode(self, conn_id) # verify tap and target exit codes exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) record_count_by_stream = runner.examine_target_output_file( self, conn_id, self.expected_sync_streams(), self.expected_pks()) self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 3}) records_by_stream = runner.get_records_from_target_output() for stream, recs in records_by_stream.items(): # verify the persisted schema was correct self.assertEqual( recs['schema'], expected_schemas[stream], msg= "Persisted schema did not match expected schema for stream `{}`." .format(stream)) #first record will be the previous delete delete_message = records_by_stream[ 'postgres_logical_replication_test']['messages'][0] sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') self.assertIsNotNone(sdc_deleted_at) self.assertEqual(delete_message['data']['id'], 2) #the 2nd message will be the more recent delete delete_message = records_by_stream[ 'postgres_logical_replication_test']['messages'][1] self.assertEqual(delete_message['action'], 'upsert') sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') self.assertIsNotNone(sdc_deleted_at) self.assertEqual(delete_message['data']['id'], 4) print("deleted record is correct") #the 3rd message will be the more recent delete delete_message = records_by_stream[ 'postgres_logical_replication_test']['messages'][2] self.assertEqual(delete_message['action'], 'upsert') sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') self.assertIsNotNone(sdc_deleted_at) self.assertEqual(delete_message['data']['id'], 5) print("deleted record is correct") state = menagerie.get_state(conn_id) bookmark = state['bookmarks'][ 'logical_1-public-postgres_logical_replication_test'] self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") self.assertIsNotNone( bookmark['lsn'], msg="expected bookmark for stream ROOT-CHICKEN to have an scn") lsn_5 = bookmark['lsn'] self.assertTrue(lsn_5 >= lsn_4) #table_version does NOT change self.assertEqual( bookmark['version'], table_version, msg= "expected bookmark for stream postgres_logical_replication_test to match version" ) #---------------------------------------------------------------------- # invoke the sync job again after updating a record #---------------------------------------------------------------------- print("updating row from source db") with db_utils.get_test_connection(test_db) as conn: with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: cur.execute( "UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1" .format( canonicalized_table_name(test_schema_name, test_table_name, cur))) sync_job_name = runner.run_sync_mode(self, conn_id) # verify tap and target exit codes exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) record_count_by_stream = runner.examine_target_output_file( self, conn_id, self.expected_sync_streams(), self.expected_pks()) self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 3}) records_by_stream = runner.get_records_from_target_output() for stream, recs in records_by_stream.items(): # verify the persisted schema was correct self.assertEqual( recs['schema'], expected_schemas[stream], msg= "Persisted schema did not match expected schema for stream `{}`." .format(stream)) # verify tap and target exit codes exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) self.assertEqual( len(records_by_stream['postgres_logical_replication_test'] ['messages']), 3) #first record will be the previous first delete delete_message = records_by_stream[ 'postgres_logical_replication_test']['messages'][0] sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') self.assertIsNotNone(sdc_deleted_at) self.assertEqual(delete_message['data']['id'], 4) #second record will be the previous second delete delete_message = records_by_stream[ 'postgres_logical_replication_test']['messages'][1] sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') self.assertIsNotNone(sdc_deleted_at) self.assertEqual(delete_message['data']['id'], 5) #third record will be the new update updated_message = records_by_stream[ 'postgres_logical_replication_test']['messages'][2] del updated_message['data']['_sdc_lsn'] self.assertEqual(updated_message['action'], 'upsert') expected_updated_rec = { 'our_varchar': 'THIS HAS BEEN UPDATED', 'id': 1, 'our_varchar_10': "varchar_10", 'our_text': "some text", 'our_integer': 44100, 'our_smallint': 1, 'our_bigint': 1000000, 'our_decimal': None, 'OUR TS': '1997-02-02T02:02:02.722184+00:00', 'OUR TS TZ': '1997-02-02T07:02:02.722184+00:00', 'OUR TIME': '12:11:10', 'OUR TIME TZ': '12:11:10-04:00', 'OUR DATE': '1998-03-04T00:00:00+00:00', 'our_double': None, 'our_real': None, 'our_boolean': True, 'our_bit': False, 'our_json': '{"secret": 55}', 'our_jsonb': self.rec_1['our_jsonb'], 'our_uuid': self.rec_1['our_uuid'], '_sdc_deleted_at': None, 'our_store': { 'name': 'betty', 'size': 'small' }, 'our_citext': 'maGICKal', 'our_cidr': self.rec_1['our_cidr'], 'our_inet': self.rec_1['our_inet'], 'our_mac': self.rec_1['our_mac'], 'our_alignment_enum': 'bad', 'our_money': '$56.81' } self.assertDictEqual(expected_updated_rec, updated_message['data']) print("updated record is correct") #check state again state = menagerie.get_state(conn_id) self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") chicken_bookmark = state['bookmarks'][ 'logical_1-public-postgres_logical_replication_test'] self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") self.assertIsNotNone( chicken_bookmark['lsn'], msg= "expected bookmark for stream public-postgres_logical_replication_test to have an scn" ) lsn_6 = chicken_bookmark['lsn'] self.assertTrue(lsn_6 >= lsn_5) #table_version does NOT change self.assertEqual( chicken_bookmark['version'], table_version, msg= "expected bookmark for stream public-postgres_logical_replication_test to match version" ) #---------------------------------------------------------------------- # invoke the sync job one last time. should only get the PREVIOUS update #---------------------------------------------------------------------- sync_job_name = runner.run_sync_mode(self, conn_id) # verify tap and target exit codes exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) record_count_by_stream = runner.examine_target_output_file( self, conn_id, self.expected_sync_streams(), self.expected_pks()) # we will get the previous update record again self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives update_message = records_by_stream[ 'postgres_logical_replication_test']['messages'][2] self.assertEqual(update_message['action'], 'upsert') self.assertEqual( set(update_message['data'].keys()), set(expected_updated_rec.keys()), msg="keys for expected_record_1 are wrong: {}".format( set(update_message['data'].keys()).symmetric_difference( set(expected_updated_rec.keys())))) for k, v in update_message['data'].items(): self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format( v, expected_updated_rec[k], k)) #check state again state = menagerie.get_state(conn_id) chicken_bookmark = state['bookmarks'][ 'logical_1-public-postgres_logical_replication_test'] self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") self.assertIsNotNone( chicken_bookmark['lsn'], msg= "expected bookmark for stream public-postgres_logical_replication_test to have an scn" ) lsn_7 = chicken_bookmark['lsn'] self.assertTrue(lsn_7 >= lsn_6) #table_version does NOT change self.assertEqual( chicken_bookmark['version'], table_version, msg= "expected bookmark for stream public-postgres_logical_replication_test to match version" )
def _drop_db(cr, dbname): cr.execute("DROP DATABASE %s", (AsIs(quote_ident(dbname, cr)), ))
def _copy_db(cr, source, dest): cr.execute( "CREATE DATABASE %s WITH TEMPLATE %s", (AsIs(quote_ident(dest, cr)), AsIs(quote_ident(source, cr))), )
def setUp(self): with get_test_connection('postgres') as conn: with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: cur.execute('DROP MATERIALIZED VIEW IF EXISTS "LIKE CHICKEN TIMES"') table_spec = {"columns": [{"name" : 'our_int_array_pk', "type" : "integer[]", "primary_key" : True }, {"name" : 'our_text_array', "type" : "text[]" }], "name" : TestArraysLikeTable.table_name} ensure_test_table(table_spec) with get_test_connection('postgres') as conn: with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: create_sql = "CREATE MATERIALIZED VIEW {} AS SELECT * FROM {}\n".format(quote_ident(TestArraysLikeTable.like_table_name, cur), quote_ident(TestArraysLikeTable.table_name, cur)) cur.execute(create_sql)
def setUp(self): db_utils.ensure_db("dev") self.maxDiff = None creds = {} missing_envs = [x for x in [os.getenv('TAP_POSTGRES_HOST'), os.getenv('TAP_POSTGRES_USER'), os.getenv('TAP_POSTGRES_PASSWORD'), os.getenv('TAP_POSTGRES_PORT'), os.getenv('TAP_POSTGRES_DBNAME')] if x == None] if len(missing_envs) != 0: #pylint: disable=line-too-long raise Exception("set TAP_POSTGRES_HOST, TAP_POSTGRES_DBNAME, TAP_POSTGRES_USER, TAP_POSTGRES_PASSWORD, TAP_POSTGRES_PORT") with db_utils.get_test_connection('dev') as conn: conn.autocommit = True with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: old_table = cur.execute("""SELECT EXISTS ( SELECT 1 FROM information_schema.tables WHERE table_schema = %s AND table_name = %s);""", [test_schema_name, test_table_name]) old_table = cur.fetchone()[0] if old_table: cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) cur = conn.cursor() cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) if cur.fetchone()[0] is None: cur.execute(""" CREATE EXTENSION hstore; """) cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) create_table_sql = """ CREATE TABLE {} (id SERIAL PRIMARY KEY, our_varchar VARCHAR, our_varchar_10 VARCHAR(10), our_text TEXT, our_integer INTEGER, our_smallint SMALLINT, our_bigint BIGINT, our_decimal NUMERIC({},{}), "OUR TS" TIMESTAMP WITHOUT TIME ZONE, "OUR TS TZ" TIMESTAMP WITH TIME ZONE, "OUR TIME" TIME WITHOUT TIME ZONE, "OUR TIME TZ" TIME WITH TIME ZONE, "OUR DATE" DATE, our_double DOUBLE PRECISION, our_real REAL, our_boolean BOOLEAN, our_bit BIT(1), our_json JSON, our_jsonb JSONB, our_uuid UUID, our_store HSTORE, our_citext CITEXT, our_inet inet, our_cidr cidr, our_mac macaddr, our_alignment_enum ALIGNMENT, our_money money) """.format(canonicalized_table_name(test_schema_name, test_table_name, cur), NUMERIC_PRECISION, NUMERIC_SCALE) cur.execute(create_table_sql) #insert fixture data 1 our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) nyc_tz = pytz.timezone('America/New_York') our_ts_tz = nyc_tz.localize(our_ts) our_time = datetime.time(12,11,10) our_time_tz = our_time.isoformat() + "-04:00" our_date = datetime.date(1998, 3, 4) my_uuid = str(uuid.uuid1()) self.rec_1 = {'our_varchar' : "our_varchar", 'our_varchar_10' : "varchar_10", 'our_text' : "some text", 'our_integer' : 44100, 'our_smallint' : 1, 'our_bigint' : 1000000, 'our_decimal' : decimal.Decimal('.01'), quote_ident('OUR TS', cur) : our_ts, quote_ident('OUR TS TZ', cur) : our_ts_tz, quote_ident('OUR TIME', cur) : our_time, quote_ident('OUR TIME TZ', cur) : our_time_tz, quote_ident('OUR DATE', cur) : our_date, 'our_double' : decimal.Decimal('1.1'), 'our_real' : 1.2, 'our_boolean' : True, 'our_bit' : '0', 'our_json' : json.dumps({'secret' : 55}), 'our_jsonb' : json.dumps({'burgers' : 'good'}), 'our_uuid' : my_uuid, 'our_store' : 'size=>"small",name=>"betty"', 'our_citext': 'maGICKal 4', 'our_cidr' : '192.168.100.128/25', 'our_inet': '192.168.100.128/24', 'our_mac' : '08:00:2b:01:02:03', 'our_alignment_enum': 'good', 'our_money': '100.1122', } insert_record(cur, test_table_name, self.rec_1) #insert fixture data 2 our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) nyc_tz = pytz.timezone('America/New_York') our_ts_tz = nyc_tz.localize(our_ts) our_time = datetime.time(10,9,8) our_time_tz = our_time.isoformat() + "-04:00" our_date = datetime.date(1964, 7, 1) my_uuid = str(uuid.uuid1()) self.rec_2 = {'our_varchar' : "our_varchar 2", 'our_varchar_10' : "varchar_10", 'our_text' : "some text 2", 'our_integer' : 44101, 'our_smallint' : 2, 'our_bigint' : 1000001, 'our_decimal' : decimal.Decimal('.02'), quote_ident('OUR TS', cur) : our_ts, quote_ident('OUR TS TZ', cur) : our_ts_tz, quote_ident('OUR TIME', cur) : our_time, quote_ident('OUR TIME TZ', cur) : our_time_tz, quote_ident('OUR DATE', cur) : our_date, 'our_double' : decimal.Decimal('1.1'), 'our_real' : decimal.Decimal('1.2'), 'our_boolean' : True, 'our_bit' : '1', 'our_json' : json.dumps(["nymn 77"]), 'our_jsonb' : json.dumps({'burgers' : 'good++'}), 'our_uuid' : my_uuid, 'our_store' : 'dances=>"floor",name=>"betty"', 'our_citext': 'maGICKal 2', 'our_cidr' : '192.168.101.128/25', 'our_inet': '192.168.101.128/24', 'our_mac' : '08:00:2b:01:02:04', 'our_money': None } insert_record(cur, test_table_name, self.rec_2) self.rec_3 = {'our_decimal' : decimal.Decimal('NaN'), 'our_double' : float('nan'), 'our_real' : float('-inf') } insert_record(cur, test_table_name, self.rec_3)
% (schemaname, indexname, free_percent, args.minimal_compact_percent)) continue free_space_total_plan += free_space log.info( 'Bloat stats: "%s.%s" - free_percent %d%%, free_space %s' % (schemaname, indexname, free_percent, size_pretty(curs, free_space))) (reindex_indexname, reindex_query) = get_reindex_query( indexname, indexdef, tablespace, conname) (alter_query, drop_query) = get_alter_drop_index_query( quote_ident(schemaname, curs), quote_ident(tablename, curs), quote_ident(indexname, curs), quote_ident(reindex_indexname, curs), quote_ident(conname, curs) if conname else conname, contypedef, is_deferrable, is_deferred) if args.print_queries: print_queries(reindex_query, alter_query, drop_query) if args.dry_run: continue reindex_time = datetime.datetime.now() try:
def test_run(self): conn_id = connections.ensure_connection(self) # run in check mode check_job_name = runner.run_check_mode(self, conn_id) # verify check exit codes exit_status = menagerie.get_exit_status(conn_id, check_job_name) menagerie.verify_check_exit_status(self, exit_status, check_job_name) # verify the tap discovered the right streams found_catalogs = [ fc for fc in menagerie.get_catalogs(conn_id) if fc['tap_stream_id'] in self.expected_check_streams() ] self.assertGreaterEqual( len(found_catalogs), 1, msg="unable to locate schemas for connection {}".format(conn_id)) found_catalog_names = set( map(lambda c: c['tap_stream_id'], found_catalogs)) diff = self.expected_check_streams().symmetric_difference( found_catalog_names) self.assertEqual( len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) # verify that persisted streams have the correct properties test_catalog = found_catalogs[0] self.assertEqual(test_table_name, test_catalog['stream_name']) print("discovered streams are correct") print( 'checking discoverd metadata for public-postgres_full_table_test...' ) md = menagerie.get_annotated_schema( conn_id, test_catalog['stream_id'])['metadata'] self.assertEqual( { ('properties', 'our_varchar'): { 'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'character varying' }, ('properties', 'our_boolean'): { 'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'boolean' }, ('properties', 'our_real'): { 'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'real' }, ('properties', 'our_uuid'): { 'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'uuid' }, ('properties', 'our_bit'): { 'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'bit' }, ('properties', 'OUR TS TZ'): { 'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'timestamp with time zone' }, ('properties', 'our_varchar_10'): { 'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'character varying' }, ('properties', 'our_store'): { 'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'hstore' }, ('properties', 'OUR TIME'): { 'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'time without time zone' }, ('properties', 'our_decimal'): { 'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'numeric' }, ('properties', 'OUR TS'): { 'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'timestamp without time zone' }, ('properties', 'our_jsonb'): { 'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'jsonb' }, ('properties', 'OUR TIME TZ'): { 'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'time with time zone' }, ('properties', 'our_text'): { 'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'text' }, ('properties', 'OUR DATE'): { 'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'date' }, ('properties', 'our_double'): { 'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'double precision' }, (): { 'is-view': False, 'schema-name': 'public', 'table-key-properties': ['id'], 'database-name': 'dev', 'row-count': 0 }, ('properties', 'our_bigint'): { 'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'bigint' }, ('properties', 'id'): { 'inclusion': 'automatic', 'selected-by-default': True, 'sql-datatype': 'integer' }, ('properties', 'our_json'): { 'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'json' }, ('properties', 'our_smallint'): { 'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'smallint' }, ('properties', 'our_integer'): { 'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'integer' }, ('properties', 'our_cidr'): { 'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'cidr' }, ('properties', 'our_citext'): { 'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'citext' }, ('properties', 'our_inet'): { 'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'inet' }, ('properties', 'our_mac'): { 'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'macaddr' }, ('properties', 'our_money'): { 'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'money' } }, metadata.to_map(md)) additional_md = [{ "breadcrumb": [], "metadata": { 'replication-method': 'INCREMENTAL', 'replication-key': 'OUR TS TZ' } }] selected_metadata = connections.select_catalog_and_fields_via_metadata( conn_id, test_catalog, menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), additional_md) # clear state menagerie.set_state(conn_id, {}) # Sync Job 1 sync_job_name = runner.run_sync_mode(self, conn_id) # verify tap and target exit codes exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) record_count_by_stream = runner.examine_target_output_file( self, conn_id, self.expected_sync_streams(), self.expected_pks()) self.assertEqual(record_count_by_stream, {test_table_name: 2}) records_by_stream = runner.get_records_from_target_output() table_version = records_by_stream[test_table_name]['table_version'] self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) self.assertEqual( records_by_stream[test_table_name]['messages'][0]['action'], 'activate_version') self.assertEqual( records_by_stream[test_table_name]['messages'][1]['action'], 'upsert') self.assertEqual( records_by_stream[test_table_name]['messages'][2]['action'], 'upsert') # verifications about individual records for table_name, recs in records_by_stream.items(): # verify the persisted schema was correct self.assertEqual( recs['schema'], expected_schemas[table_name], msg= "Persisted schema did not match expected schema for table `{}`." .format(table_name)) expected_record_2 = { 'our_decimal': decimal.Decimal('1234567890.01'), 'our_text': 'some text', 'our_bit': False, 'our_integer': 44100, 'our_double': decimal.Decimal('1.1'), 'id': 1, 'our_json': '{"secret": 55}', 'our_boolean': True, 'our_jsonb': self.rec_1['our_jsonb'], 'our_bigint': 1000000, 'OUR TS': '1997-02-02T02:02:02.722184+00:00', 'OUR TS TZ': '1997-02-02T07:02:02.722184+00:00', 'OUR TIME': '12:11:10', 'OUR TIME TZ': '12:11:10-04:00', 'our_store': { "name": "betty", "size": "small" }, 'our_smallint': 1, 'OUR DATE': '1998-03-04T00:00:00+00:00', 'our_varchar': 'our_varchar', 'our_uuid': self.rec_1['our_uuid'], 'our_real': decimal.Decimal('1.2'), 'our_varchar_10': 'varchar_10', 'our_citext': self.rec_1['our_citext'], 'our_inet': self.rec_1['our_inet'], 'our_cidr': self.rec_1['our_cidr'], 'our_mac': self.rec_1['our_mac'], 'our_money': '$1,445.57' } expected_record_1 = { 'our_decimal': decimal.Decimal('9876543210.02'), 'OUR TIME': '10:09:08', 'our_text': 'some text 2', 'our_bit': True, 'our_integer': 44101, 'our_double': decimal.Decimal('1.1'), 'id': 2, 'our_json': '{"nymn": 77}', 'our_boolean': True, 'our_jsonb': '{"burgers": "good++"}', 'our_bigint': 1000001, 'OUR TIME TZ': '10:09:08-04:00', 'our_store': { "name": "betty", "dances": "floor" }, 'OUR TS TZ': '1987-03-03T08:03:03.733184+00:00', 'our_smallint': 2, 'OUR DATE': '1964-07-01T00:00:00+00:00', 'our_varchar': 'our_varchar 2', 'OUR TS': '1987-03-03T03:03:03.733184+00:00', 'our_uuid': self.rec_2['our_uuid'], 'our_real': decimal.Decimal('1.2'), 'our_varchar_10': 'varchar_10', 'our_citext': self.rec_2['our_citext'], 'our_inet': self.rec_2['our_inet'], 'our_cidr': self.rec_2['our_cidr'], 'our_mac': self.rec_2['our_mac'], 'our_money': None } actual_record_1 = records_by_stream[test_table_name]['messages'][1] self.assertEqual(set(actual_record_1['data'].keys()), set(expected_record_1.keys()), msg="keys for expected_record_1 are wrong: {}".format( set(actual_record_1.keys()).symmetric_difference( set(expected_record_1.keys())))) for k, v in actual_record_1['data'].items(): self.assertEqual(actual_record_1['data'][k], expected_record_1[k], msg="{} != {} for key {}".format( actual_record_1['data'][k], expected_record_1[k], k)) actual_record_2 = records_by_stream[test_table_name]['messages'][2] self.assertEqual(set(actual_record_1['data'].keys()), set(expected_record_2.keys()), msg="keys for expected_record_2 are wrong: {}".format( set(actual_record_2.keys()).symmetric_difference( set(expected_record_2.keys())))) for k, v in actual_record_2['data'].items(): self.assertEqual(actual_record_2['data'][k], expected_record_2[k], msg="{} != {} for key {}".format( actual_record_2['data'][k], expected_record_2[k], k)) print("records are correct") # verify state and bookmarks state = menagerie.get_state(conn_id) bookmark = state['bookmarks'][ 'dev-public-postgres_incremental_replication_test'] self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") self.assertIsNone( bookmark.get('lsn'), msg= "expected bookmark for stream ROOT-CHICKEN to have NO lsn because we are using incremental replication" ) self.assertEqual( bookmark['version'], table_version, msg="expected bookmark for stream ROOT-CHICKEN to match version") #---------------------------------------------------------------------- # invoke the sync job AGAIN and get 1 record(the one matching the bookmark) #---------------------------------------------------------------------- #Sync Job 2 sync_job_name = runner.run_sync_mode(self, conn_id) # verify tap and target exit codes exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) record_count_by_stream = runner.examine_target_output_file( self, conn_id, self.expected_sync_streams(), self.expected_pks()) self.assertEqual(record_count_by_stream, {test_table_name: 1}) records_by_stream = runner.get_records_from_target_output() self.assertEqual(2, len(records_by_stream[test_table_name]['messages'])) self.assertEqual( records_by_stream[test_table_name]['messages'][0]['action'], 'activate_version') self.assertEqual( records_by_stream[test_table_name]['messages'][1]['action'], 'upsert') #table version did NOT change self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) # verifications about individual records for stream, recs in records_by_stream.items(): # verify the persisted schema was correct self.assertEqual( recs['schema'], expected_schemas[stream], msg= "Persisted schema did not match expected schema for stream `{}`." .format(stream)) actual_record_3 = records_by_stream[test_table_name]['messages'][1] self.assertEqual(set(actual_record_3['data'].keys()), set(expected_record_2.keys()), msg="keys for expected_record_1 are wrong: {}".format( set(actual_record_3.keys()).symmetric_difference( set(expected_record_2.keys())))) expected_record_3 = expected_record_2 for k, v in actual_record_3['data'].items(): self.assertEqual(actual_record_3['data'][k], expected_record_3[k], msg="{} != {} for key {}".format( actual_record_3['data'][k], expected_record_3[k], k)) print("records are correct") state = menagerie.get_state(conn_id) bookmark = state['bookmarks'][ 'dev-public-postgres_incremental_replication_test'] self.assertIsNone( bookmark.get('lsn'), msg= "expected bookmark for stream ROOT-CHICKEN to have NO lsn because we are using incremental replication" ) self.assertEqual( bookmark['version'], table_version, msg="expected bookmark for stream ROOT-CHICKEN to match version") self.assertEqual(bookmark['replication_key'], 'OUR TS TZ') self.assertEqual(bookmark['replication_key_value'], '1997-02-02T07:02:02.722184+00:00') #---------------------------------------------------------------------- # insert new record with higher replication_key value and invoke the sync job AGAIN and get new record #---------------------------------------------------------------------- our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) nyc_tz = pytz.timezone('America/New_York') our_ts_tz = nyc_tz.localize(our_ts) our_time = datetime.time(12, 11, 10) our_time_tz = our_time.isoformat() + "-04:00" our_date = datetime.date(1999, 9, 9) my_uuid = str(uuid.uuid1()) with db_utils.get_test_connection('dev') as conn: conn.autocommit = True with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: self.rec_3 = { 'our_varchar': "our_varchar 4", 'our_varchar_10': "varchar_3", 'our_text': "some text 4", 'our_integer': 55200, 'our_smallint': 1, 'our_bigint': 100000, 'our_decimal': decimal.Decimal('1234567899.99'), quote_ident('OUR TS', cur): our_ts, quote_ident('OUR TS TZ', cur): our_ts_tz, quote_ident('OUR TIME', cur): our_time, quote_ident('OUR TIME TZ', cur): our_time_tz, quote_ident('OUR DATE', cur): our_date, 'our_double': decimal.Decimal('1.1'), 'our_real': decimal.Decimal('1.2'), 'our_boolean': True, 'our_bit': '0', 'our_json': json.dumps('some string'), 'our_jsonb': json.dumps(['burgers are good']), 'our_uuid': my_uuid, 'our_store': 'size=>"small",name=>"betty"', 'our_citext': 'cyclops 3', 'our_cidr': '192.168.101.128/25', 'our_inet': '192.168.101.128/24', 'our_mac': '08:00:2b:01:02:04', 'our_money': None, } insert_record(cur, test_table_name, self.rec_3) #Sync Job 3 sync_job_name = runner.run_sync_mode(self, conn_id) exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) record_count_by_stream = runner.examine_target_output_file( self, conn_id, self.expected_sync_streams(), self.expected_pks()) self.assertEqual(record_count_by_stream, {test_table_name: 2}) records_by_stream = runner.get_records_from_target_output() self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) #table version did NOT change self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) self.assertEqual( records_by_stream[test_table_name]['messages'][1]['action'], 'upsert') self.assertEqual( records_by_stream[test_table_name]['messages'][2]['action'], 'upsert') # verificationsg about individual records for table_name, recs in records_by_stream.items(): # verify the persisted schema was correct self.assertEqual( recs['schema'], expected_schemas[table_name], msg= "Persisted schema did not match expected schema for table `{}`." .format(table_name)) #check 1st record actual_record_4 = records_by_stream[test_table_name]['messages'][1] expected_record_4 = expected_record_2 self.assertEqual(set(actual_record_4['data'].keys()), set(expected_record_1.keys()), msg="keys for expected_record_4 are wrong: {}".format( set(actual_record_4.keys()).symmetric_difference( set(expected_record_1.keys())))) for k, v in actual_record_4['data'].items(): self.assertEqual(actual_record_4['data'][k], expected_record_4[k], msg="{} != {} for key {}".format( actual_record_4['data'][k], expected_record_4[k], k)) #check 2nd record actual_record_5 = records_by_stream[test_table_name]['messages'][2] expected_record_5 = { 'our_decimal': decimal.Decimal('1234567899.99'), 'our_text': 'some text 4', 'our_bit': False, 'our_integer': 55200, 'our_double': decimal.Decimal('1.1'), 'id': 3, 'our_json': self.rec_3['our_json'], 'our_boolean': True, 'our_jsonb': self.rec_3['our_jsonb'], 'our_bigint': 100000, 'OUR TS': '2111-01-01T12:12:12.222111+00:00', 'OUR TS TZ': '2111-01-01T17:12:12.222111+00:00', 'OUR TIME': '12:11:10', 'OUR TIME TZ': '12:11:10-04:00', 'our_store': { "name": "betty", "size": "small" }, 'our_smallint': 1, 'OUR DATE': '1999-09-09T00:00:00+00:00', 'our_varchar': 'our_varchar 4', 'our_uuid': self.rec_3['our_uuid'], 'our_real': decimal.Decimal('1.2'), 'our_varchar_10': 'varchar_3', 'our_citext': 'cyclops 3', 'our_cidr': '192.168.101.128/25', 'our_inet': '192.168.101.128/24', 'our_mac': '08:00:2b:01:02:04', 'our_money': None } self.assertEqual(set(actual_record_5['data'].keys()), set(expected_record_5.keys()), msg="keys for expected_record_5 are wrong: {}".format( set(actual_record_5.keys()).symmetric_difference( set(expected_record_5.keys())))) for k, v in actual_record_5['data'].items(): self.assertEqual(actual_record_5['data'][k], expected_record_5[k], msg="{} != {} for key {}".format( actual_record_5['data'][k], expected_record_5[k], k)) print("records are correct") state = menagerie.get_state(conn_id) bookmark = state['bookmarks'][ 'dev-public-postgres_incremental_replication_test'] self.assertIsNone( bookmark.get('lsn'), msg= "expected bookmark for stream ROOT-CHICKEN to have NO lsn because we are using incremental replication" ) self.assertEqual( bookmark['version'], table_version, msg="expected bookmark for stream ROOT-CHICKEN to match version") self.assertEqual(bookmark['replication_key'], 'OUR TS TZ') self.assertEqual(bookmark['replication_key_value'], '2111-01-01T17:12:12.222111+00:00')
def canonicalized_table_name(conn_cursor, schema, table): return "{}.{}".format(quote_ident(schema, conn_cursor), quote_ident(table, conn_cursor))
def number_generator( tenant_id: str, sequence_name: str, number_format: str, checksum_salt: str, checksum_algorithm: str, checksum_length: int, checksum_format: str, ) -> str: if not tenant_id: raise ValueError("Tenant-ID not given") sequence_name = f'{sequence_name}_{tenant_id}' val = None i = 0 while not val and i < 2: try: with connection.cursor() as cursor: with atomic(): cursor.execute( "SELECT nextval(%s);", (quote_ident(sequence_name, cursor.cursor), )) val = cursor.fetchone() break except (ProgrammingError, InternalError) as error: if error.__cause__.pgcode != UNDEFINED_TABLE: raise i += 1 with connection.cursor() as cursor: with atomic(): cursor.execute( "CREATE SEQUENCE IF NOT EXISTS %s START 1;" % quote_ident(sequence_name, cursor.cursor)) if not val: raise ValueError('cannot_obtain_next_number') nextnumber = int(val[0]) now = timezone.now() number = number_format % { 'year': now.year, 'month': now.month, 'number': nextnumber, } if checksum_length > 0: checksum_value = hmac.new(checksum_salt, bytes(number, 'utf-8'), digestmod=checksum_algorithm) checksum_int = int(checksum_value.hexdigest(), 16) checksum_number = checksum_int % 10**checksum_length checksum = '%0*d' % (checksum_length, checksum_number) number = checksum_format % { 'number': number, 'checksum': checksum, } return number
import dj_database_url import psycopg2 import psycopg2.extensions as ext DATABASE_URL = os.environ.get('PYBOSSA_DATABASE_URL', 'postgresql://*****:*****@localhost/pybossa') config = dj_database_url.parse(DATABASE_URL, conn_max_age=60) with psycopg2.connect(host=config['HOST'], port=config['PORT'], dbname='postgres', user=config['USER'], password=config['PASSWORD']) as conn: DROP_DB_SQL = """ DROP DATABASE IF EXISTS {0}; """.format(ext.quote_ident(config['NAME'], conn)) CREATE_DB_SQL = """ CREATE DATABASE {0} ENCODING 'UTF-8' LC_COLLATE 'en_US.UTF-8' LC_CTYPE 'en_US.UTF-8' TEMPLATE template0; """.format(ext.quote_ident(config['NAME'], conn)) # autocommit mode required for create and drop conn.set_session(autocommit=True) with conn.cursor() as cur: print DROP_DB_SQL cur.execute(DROP_DB_SQL) print CREATE_DB_SQL cur.execute(CREATE_DB_SQL)
def test_unicode_ident(self): snowman = "\u2603" quoted = '"' + snowman + '"' self.assertEqual(quote_ident(snowman, self.conn), quoted)
def setUp(self): db_utils.ensure_environment_variables_set() db_utils.ensure_db(test_db) self.maxDiff = None with db_utils.get_test_connection(test_db) as conn: conn.autocommit = True with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: cur.execute(""" SELECT EXISTS (SELECT 1 FROM pg_replication_slots WHERE slot_name = 'stitch') """ ) old_slot = cur.fetchone()[0] with db_utils.get_test_connection(test_db, True) as conn2: with conn2.cursor() as cur2: if old_slot: cur2.drop_replication_slot("stitch") cur2.create_replication_slot('stitch', output_plugin='wal2json') old_table = cur.execute( """SELECT EXISTS ( SELECT 1 FROM information_schema.tables WHERE table_schema = %s AND table_name = %s);""", [test_schema_name, test_table_name]) old_table = cur.fetchone()[0] if old_table: cur.execute("DROP TABLE {}".format( canonicalized_table_name(test_schema_name, test_table_name, cur))) cur = conn.cursor() cur.execute( """ SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """ ) if cur.fetchone()[0] is None: cur.execute(""" CREATE EXTENSION hstore; """) cur.execute( """ CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""" ) cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) cur.execute( """ CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """ ) create_table_sql = """ CREATE TABLE {} (id SERIAL PRIMARY KEY, our_varchar VARCHAR, our_varchar_10 VARCHAR(10), our_text TEXT, our_text_2 TEXT, our_integer INTEGER, our_smallint SMALLINT, our_bigint BIGINT, our_decimal NUMERIC(12,2), "OUR TS" TIMESTAMP WITHOUT TIME ZONE, "OUR TS TZ" TIMESTAMP WITH TIME ZONE, "OUR TIME" TIME WITHOUT TIME ZONE, "OUR TIME TZ" TIME WITH TIME ZONE, "OUR DATE" DATE, our_double DOUBLE PRECISION, our_real REAL, our_boolean BOOLEAN, our_bit BIT(1), our_json JSON, our_jsonb JSONB, our_uuid UUID, our_store HSTORE, our_citext CITEXT, our_cidr cidr, our_inet inet, our_mac macaddr, our_alignment_enum ALIGNMENT, our_money money) """.format( canonicalized_table_name(test_schema_name, test_table_name, cur)) cur.execute(create_table_sql) #insert fixture data 1 our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) nyc_tz = pytz.timezone('America/New_York') our_ts_tz = nyc_tz.localize(our_ts) our_time = datetime.time(12, 11, 10) our_time_tz = our_time.isoformat() + "-04:00" our_date = datetime.date(1998, 3, 4) my_uuid = str(uuid.uuid1()) self.rec_1 = { 'our_varchar': "our_varchar", 'our_varchar_10': "varchar_10", 'our_text': "some text", 'our_text_2': "NOT SELECTED", 'our_integer': 44100, 'our_smallint': 1, 'our_bigint': 1000000, 'our_decimal': decimal.Decimal('1234567890.01'), quote_ident('OUR TS', cur): our_ts, quote_ident('OUR TS TZ', cur): our_ts_tz, quote_ident('OUR TIME', cur): our_time, quote_ident('OUR TIME TZ', cur): our_time_tz, quote_ident('OUR DATE', cur): our_date, 'our_double': 1.1, 'our_real': 1.2, 'our_boolean': True, 'our_bit': '0', 'our_json': json.dumps({'secret': 55}), 'our_jsonb': json.dumps(['burgers are good']), 'our_uuid': my_uuid, 'our_store': 'size=>"small",name=>"betty"', 'our_citext': 'maGICKal', 'our_cidr': '192.168.100.128/25', 'our_inet': '192.168.100.128/24', 'our_mac': '08:00:2b:01:02:03', 'our_alignment_enum': 'bad' } insert_record(cur, test_table_name, self.rec_1) #insert fixture data 2 our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) nyc_tz = pytz.timezone('America/New_York') our_ts_tz = nyc_tz.localize(our_ts) our_time = datetime.time(10, 9, 8) our_time_tz = our_time.isoformat() + "-04:00" our_date = datetime.date(1964, 7, 1) my_uuid = str(uuid.uuid1()) self.rec_2 = { 'our_varchar': "our_varchar 2", 'our_varchar_10': "varchar_10", 'our_text': "some text 2", 'our_text_2': "NOT SELECTED", 'our_integer': 44101, 'our_smallint': 2, 'our_bigint': 1000001, 'our_decimal': decimal.Decimal('9876543210.02'), quote_ident('OUR TS', cur): our_ts, quote_ident('OUR TS TZ', cur): our_ts_tz, quote_ident('OUR TIME', cur): our_time, quote_ident('OUR TIME TZ', cur): our_time_tz, quote_ident('OUR DATE', cur): our_date, 'our_double': 1.1, 'our_real': 1.2, 'our_boolean': True, 'our_bit': '1', 'our_json': json.dumps({'nymn': 77}), 'our_jsonb': json.dumps({'burgers': 'good++'}), 'our_uuid': my_uuid, 'our_store': 'dances=>"floor",name=>"betty"', 'our_citext': 'maGICKal 2', 'our_cidr': '192.168.101.128/25', 'our_inet': '192.168.101.128/24', 'our_mac': '08:00:2b:01:02:04', } insert_record(cur, test_table_name, self.rec_2) #insert fixture data 3 our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) nyc_tz = pytz.timezone('America/New_York') our_ts_tz = nyc_tz.localize(our_ts) our_time = datetime.time(12, 11, 10) our_time_tz = our_time.isoformat() + "-04:00" our_date = datetime.date(1998, 3, 4) my_uuid = str(uuid.uuid1()) self.rec_3 = { 'our_varchar': "our_varchar 3", 'our_varchar_10': "varchar_10", 'our_text': "some text", 'our_text_2': "NOT SELECTED", 'our_integer': 44100, 'our_smallint': 1, 'our_bigint': 1000000, 'our_decimal': decimal.Decimal('1234567890.01'), quote_ident('OUR TS', cur): our_ts, quote_ident('OUR TS TZ', cur): our_ts_tz, quote_ident('OUR TIME', cur): our_time, quote_ident('OUR TIME TZ', cur): our_time_tz, quote_ident('OUR DATE', cur): our_date, 'our_double': 1.1, 'our_real': 1.2, 'our_boolean': True, 'our_bit': '0', 'our_json': json.dumps({'secret': 55}), 'our_jsonb': json.dumps(['burgers are good']), 'our_uuid': my_uuid, 'our_store': 'size=>"small",name=>"betty"', 'our_citext': 'maGICKal', 'our_cidr': '192.168.100.128/25', 'our_inet': '192.168.100.128/24', 'our_mac': '08:00:2b:01:02:03', 'our_alignment_enum': 'bad' } insert_record(cur, test_table_name, self.rec_3) #insert fixture data 4 our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) nyc_tz = pytz.timezone('America/New_York') our_ts_tz = nyc_tz.localize(our_ts) our_time = datetime.time(10, 9, 8) our_time_tz = our_time.isoformat() + "-04:00" our_date = datetime.date(1964, 7, 1) my_uuid = str(uuid.uuid1()) self.rec_4 = { 'our_varchar': "our_varchar 4", 'our_varchar_10': "varchar_10", 'our_text': "some text 2", 'our_text_2': "NOT SELECTED", 'our_integer': 44101, 'our_smallint': 2, 'our_bigint': 1000001, 'our_decimal': decimal.Decimal('9876543210.02'), quote_ident('OUR TS', cur): our_ts, quote_ident('OUR TS TZ', cur): our_ts_tz, quote_ident('OUR TIME', cur): our_time, quote_ident('OUR TIME TZ', cur): our_time_tz, quote_ident('OUR DATE', cur): our_date, 'our_double': 1.1, 'our_real': 1.2, 'our_boolean': True, 'our_bit': '1', 'our_json': json.dumps({'nymn': 77}), 'our_jsonb': json.dumps({'burgers': 'good++'}), 'our_uuid': my_uuid, 'our_store': 'dances=>"floor",name=>"betty"', 'our_citext': 'maGICKal 2', 'our_cidr': '192.168.101.128/25', 'our_inet': '192.168.101.128/24', 'our_mac': '08:00:2b:01:02:04', } insert_record(cur, test_table_name, self.rec_4)
by underscores), and the common name (also delimited by underscores)') genome = sys.argv[1] bad_tax_name = sys.argv[2] bad_com_name = sys.argv[3] # remove underscores from tax_name and com_name tax_name = re.sub('_', ' ', bad_tax_name) com_name = re.sub('_', ' ', bad_com_name) # get connection to database conn = psycopg2.connect(dbname = 'iaod', user = '******') cur = conn.cursor() # many of the genome assemblies have hyphens in them, so we have to escape them escaped_genome = quote_ident(genome, cur) # remove existing data for this genome assembly and/or create new tables for it print(f'Updating SQL tables for {tax_name} ({genome}).') update_tables(escaped_genome, cur) # make dictionary of gene IDs and gene symbols; batch input file because it may # get really long with some genomes print(f'Reading gene symbols.') symbols = {} with open(f'info/{genome}_gene_symbols.tsv', 'r') as in_file: for batch in iter(lambda: tuple(islice(in_file, 10000)), ()): for line in batch: id_symbol_pair = line.rstrip('\n').split('\t') symbols[id_symbol_pair[0]] = id_symbol_pair[1]
def as_string(self, context): return '.'.join(ext.quote_ident(s, context) for s in self._wrapped)
def start_replication(self, slot_name=None, slot_type=None, start_lsn=0, timeline=0, options=None, decode=False): """Start replication stream.""" command = "START_REPLICATION " if slot_type is None: slot_type = self.connection.replication_type if slot_type == REPLICATION_LOGICAL: if slot_name: command += "SLOT %s " % quote_ident(slot_name, self) else: raise psycopg2.ProgrammingError( "slot name is required for logical replication") command += "LOGICAL " elif slot_type == REPLICATION_PHYSICAL: if slot_name: command += "SLOT %s " % quote_ident(slot_name, self) # don't add "PHYSICAL", before 9.4 it was just START_REPLICATION XXX/XXX else: raise psycopg2.ProgrammingError( "unrecognized replication type: %s" % repr(slot_type)) if type(start_lsn) is str: lsn = start_lsn.split('/') lsn = "%X/%08X" % (int(lsn[0], 16), int(lsn[1], 16)) else: lsn = "%X/%08X" % ( (start_lsn >> 32) & 0xFFFFFFFF, start_lsn & 0xFFFFFFFF) command += lsn if timeline != 0: if slot_type == REPLICATION_LOGICAL: raise psycopg2.ProgrammingError( "cannot specify timeline for logical replication") command += " TIMELINE %d" % timeline if options: if slot_type == REPLICATION_PHYSICAL: raise psycopg2.ProgrammingError( "cannot specify output plugin options for physical replication" ) command += " (" for k, v in options.iteritems(): if not command.endswith('('): command += ", " command += "%s %s" % (quote_ident(k, self), _A(str(v))) command += ")" self.start_replication_expert(command, decode=decode)
def setUp(self): db_utils.ensure_db() self.maxDiff = None creds = {} missing_envs = [ x for x in [ os.getenv('TAP_POSTGRES_HOST'), os.getenv('TAP_POSTGRES_USER'), os.getenv('TAP_POSTGRES_PASSWORD'), os.getenv('TAP_POSTGRES_DBNAME'), os.getenv('TAP_POSTGRES_PORT') ] if x == None ] if len(missing_envs) != 0: #pylint: disable=line-too-long raise Exception( "set TAP_POSTGRES_HOST, TAP_POSTGRES_DBNAME, TAP_POSTGRES_USER, TAP_POSTGRES_PASSWORD, TAP_POSTGRES_PORT" ) with db_utils.get_test_connection() as conn: conn.autocommit = True with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: for table in [test_table_name_1, test_table_name_2]: old_table = cur.execute( """SELECT EXISTS ( SELECT 1 FROM information_schema.tables WHERE table_schema = %s AND table_name = %s)""", [test_schema_name, table]) old_table = cur.fetchone()[0] if old_table: cur.execute("DROP TABLE {} CASCADE".format( canonicalized_table_name(test_schema_name, table, cur))) cur.execute("""DROP VIEW IF EXISTS {} """.format( quote_ident(test_view, cur))) cur.execute("""CREATE TABLE {} (id SERIAL PRIMARY KEY, name VARCHAR, size VARCHAR) """.format( canonicalized_table_name(test_schema_name, test_table_name_1, cur))) cur.execute("""CREATE TABLE {} (fk_id bigint, age integer) """.format( canonicalized_table_name(test_schema_name, test_table_name_2, cur))) cur.execute("""CREATE VIEW {} AS (SELECT * FROM {} join {} on {}.id = {}.fk_id )""".format( quote_ident(test_view, cur), canonicalized_table_name(test_schema_name, test_table_name_1, cur), canonicalized_table_name(test_schema_name, test_table_name_2, cur), canonicalized_table_name(test_schema_name, test_table_name_1, cur), canonicalized_table_name(test_schema_name, test_table_name_2, cur))) self.rec_1 = {'name': 'fred', 'size': 'big'} insert_record(cur, test_table_name_1, self.rec_1) cur.execute("SELECT id FROM {}".format( canonicalized_table_name(test_schema_name, test_table_name_1, cur))) fk_id = cur.fetchone()[0] self.rec_2 = {'fk_id': fk_id, 'age': 99} insert_record(cur, test_table_name_2, self.rec_2)
def setUp(self): db_utils.ensure_environment_variables_set() db_utils.ensure_db() self.maxDiff = None with db_utils.get_test_connection() as conn: conn.autocommit = True with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: for table in [test_table_name_1, test_table_name_2]: old_table = cur.execute( """SELECT EXISTS ( SELECT 1 FROM information_schema.tables WHERE table_schema = %s AND table_name = %s)""", [test_schema_name, table]) old_table = cur.fetchone()[0] if old_table: cur.execute("DROP TABLE {} CASCADE".format( canonicalized_table_name(test_schema_name, table, cur))) cur.execute("""DROP VIEW IF EXISTS {} """.format( quote_ident(test_view, cur))) cur.execute("""CREATE TABLE {} (id SERIAL PRIMARY KEY, name VARCHAR, size VARCHAR) """.format( canonicalized_table_name(test_schema_name, test_table_name_1, cur))) cur.execute("""CREATE TABLE {} (fk_id bigint, age integer) """.format( canonicalized_table_name(test_schema_name, test_table_name_2, cur))) cur.execute("""CREATE VIEW {} AS (SELECT * FROM {} join {} on {}.id = {}.fk_id )""".format( quote_ident(test_view, cur), canonicalized_table_name(test_schema_name, test_table_name_1, cur), canonicalized_table_name(test_schema_name, test_table_name_2, cur), canonicalized_table_name(test_schema_name, test_table_name_1, cur), canonicalized_table_name(test_schema_name, test_table_name_2, cur))) self.rec_1 = {'name': 'fred', 'size': 'big'} insert_record(cur, test_table_name_1, self.rec_1) cur.execute("SELECT id FROM {}".format( canonicalized_table_name(test_schema_name, test_table_name_1, cur))) fk_id = cur.fetchone()[0] self.rec_2 = {'fk_id': fk_id, 'age': 99} insert_record(cur, test_table_name_2, self.rec_2)
def test_identifier(self): from psycopg2.extensions import quote_ident self.assertEqual(quote_ident('blah-blah', self.conn), '"blah-blah"') self.assertEqual(quote_ident('quote"inside', self.conn), '"quote""inside"')
def test_run(self): conn_id = connections.ensure_connection(self) # run in check mode and verify exit codes check_job_name = runner.run_check_mode(self, conn_id) exit_status = menagerie.get_exit_status(conn_id, check_job_name) menagerie.verify_check_exit_status(self, exit_status, check_job_name) # verify basics of discovery are consistent with expectations... # verify discovery produced (at least) 1 expected catalog found_catalogs = [ found_catalog for found_catalog in menagerie.get_catalogs(conn_id) if found_catalog['tap_stream_id'] in self.expected_check_streams() ] self.assertGreaterEqual(len(found_catalogs), 1) # verify the tap discovered the expected streams found_catalog_names = { catalog['tap_stream_id'] for catalog in found_catalogs } self.assertSetEqual(self.expected_check_streams(), found_catalog_names) # verify that persisted streams have the correct properties test_catalog = found_catalogs[0] self.assertEqual(test_table_name, test_catalog['stream_name']) print("discovered streams are correct") # perform table selection print('selecting {} and all fields within the table'.format( test_table_name)) schema_and_metadata = menagerie.get_annotated_schema( conn_id, test_catalog['stream_id']) additional_md = [{ "breadcrumb": [], "metadata": { 'replication-method': 'INCREMENTAL', 'replication-key': 'OUR TS TZ' } }] _ = connections.select_catalog_and_fields_via_metadata( conn_id, test_catalog, schema_and_metadata, additional_md) # clear state menagerie.set_state(conn_id, {}) # run sync job 1 and verify exit codes sync_job_name = runner.run_sync_mode(self, conn_id) exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) # get records record_count_by_stream = runner.examine_target_output_file( self, conn_id, self.expected_sync_streams(), self.expected_primary_keys()) records_by_stream = runner.get_records_from_target_output() table_version = records_by_stream[test_table_name]['table_version'] messages = records_by_stream[test_table_name]['messages'] # verify the execpted number of records were replicated self.assertEqual(3, record_count_by_stream[test_table_name]) # verify the message actions match expectations self.assertEqual(4, len(messages)) self.assertEqual('activate_version', messages[0]['action']) self.assertEqual('upsert', messages[1]['action']) self.assertEqual('upsert', messages[2]['action']) self.assertEqual('upsert', messages[3]['action']) # verify the persisted schema matches expectations self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) # verify replicated records match expectations self.assertDictEqual(self.expected_records[0], messages[1]['data']) self.assertDictEqual(self.expected_records[1], messages[2]['data']) self.assertDictEqual(self.expected_records[2], messages[3]['data']) # verify records are in ascending order by replication-key value expected_replication_key = list( self.expected_replication_keys()[test_table_name])[0] self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) print("records are correct") # grab bookmarked state state = menagerie.get_state(conn_id) bookmark = state['bookmarks'][ 'dev-public-postgres_incremental_replication_test'] # verify state and bookmarks meet expectations self.assertIsNone(state['currently_syncing']) self.assertIsNone(bookmark.get('lsn')) self.assertEqual(table_version, bookmark['version']) self.assertEqual(expected_replication_key, bookmark['replication_key']) self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) #---------------------------------------------------------------------- # invoke the sync job AGAIN following various manipulations to the data #---------------------------------------------------------------------- with db_utils.get_test_connection('dev') as conn: conn.autocommit = True with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: # NB | We will perform the following actions prior to the next sync: # [Action (EXPECTED RESULT)] # Insert a record with a lower replication-key value (NOT REPLICATED) # Insert a record with a higher replication-key value (REPLICATED) # Insert a record with a higher replication-key value and... # Delete it (NOT REPLICATED) # Update a record with a higher replication-key value (REPLICATED) # Update a record with a lower replication-key value (NOT REPLICATED) # inserting... # a record with a replication-key value that is lower than the previous bookmark nyc_tz = pytz.timezone('America/New_York') our_time_offset = "-04:00" our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) our_ts_tz = nyc_tz.localize(our_ts) our_time = datetime.time(6, 6, 6) our_time_tz = our_time.isoformat() + our_time_offset our_date = datetime.date(1970, 7, 1) my_uuid = str(uuid.uuid1()) self.inserted_records.append({ 'our_varchar': "our_varchar 2", 'our_varchar_10': "varchar_10", 'our_text': "some text 2", 'our_integer': 44101, 'our_smallint': 2, 'our_bigint': 1000001, 'our_decimal': decimal.Decimal('9876543210.02'), quote_ident('OUR TS', cur): our_ts, quote_ident('OUR TS TZ', cur): our_ts_tz, quote_ident('OUR TIME', cur): our_time, quote_ident('OUR TIME TZ', cur): our_time_tz, quote_ident('OUR DATE', cur): our_date, 'our_double': decimal.Decimal('1.1'), 'our_real': decimal.Decimal('1.2'), 'our_boolean': True, 'our_bit': '1', 'our_json': json.dumps({'nymn': 77}), 'our_jsonb': json.dumps({'burgers': 'good++'}), 'our_uuid': my_uuid, 'our_citext': 'cyclops 2', 'our_store': 'dances=>"floor",name=>"betty"', 'our_cidr': '192.168.101.128/25', 'our_inet': '192.168.101.128/24', 'our_mac': '08:00:2b:01:02:04', 'our_money': '$0.98789' }) self.expected_records.append({ 'id': 4, 'our_varchar': "our_varchar 2", 'our_varchar_10': "varchar_10", 'our_text': "some text 2", 'our_integer': 44101, 'our_smallint': 2, 'our_bigint': 1000001, 'our_decimal': decimal.Decimal('9876543210.02'), 'OUR TS': self.expected_ts(our_ts), 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), 'OUR TIME': str(our_time), 'OUR TIME TZ': str(our_time_tz), 'OUR DATE': '1970-07-01T00:00:00+00:00', 'our_double': decimal.Decimal('1.1'), 'our_real': decimal.Decimal('1.2'), 'our_boolean': True, 'our_bit': True, 'our_json': '{"nymn": 77}', 'our_jsonb': '{"burgers": "good++"}', 'our_uuid': self.inserted_records[-1]['our_uuid'], 'our_citext': self.inserted_records[-1]['our_citext'], 'our_store': { "name": "betty", "dances": "floor" }, 'our_cidr': self.inserted_records[-1]['our_cidr'], 'our_inet': self.inserted_records[-1]['our_inet'], 'our_mac': self.inserted_records[-1]['our_mac'], 'our_money': '$0.99' }) # a record with a replication-key value that is higher than the previous bookmark our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) nyc_tz = pytz.timezone('America/New_York') our_ts_tz = nyc_tz.localize(our_ts) our_time = datetime.time(12, 11, 10) our_time_tz = our_time.isoformat() + "-04:00" our_date = datetime.date(1999, 9, 9) my_uuid = str(uuid.uuid1()) self.inserted_records.append({ 'our_varchar': "our_varchar 4", 'our_varchar_10': "varchar_3", 'our_text': "some text 4", 'our_integer': 55200, 'our_smallint': 1, 'our_bigint': 100000, 'our_decimal': decimal.Decimal('1234567899.99'), quote_ident('OUR TS', cur): our_ts, quote_ident('OUR TS TZ', cur): our_ts_tz, quote_ident('OUR TIME', cur): our_time, quote_ident('OUR TIME TZ', cur): our_time_tz, quote_ident('OUR DATE', cur): our_date, 'our_double': decimal.Decimal('1.1'), 'our_real': decimal.Decimal('1.2'), 'our_boolean': True, 'our_bit': '0', 'our_json': json.dumps('some string'), 'our_jsonb': json.dumps(['burgers are good']), 'our_uuid': my_uuid, 'our_store': 'size=>"small",name=>"betty"', 'our_citext': 'cyclops 3', 'our_cidr': '192.168.101.128/25', 'our_inet': '192.168.101.128/24', 'our_mac': '08:00:2b:01:02:04', 'our_money': None, }) self.expected_records.append({ 'our_decimal': decimal.Decimal('1234567899.99'), 'our_text': 'some text 4', 'our_bit': False, 'our_integer': 55200, 'our_double': decimal.Decimal('1.1'), 'id': 5, 'our_json': self.inserted_records[-1]['our_json'], 'our_boolean': True, 'our_jsonb': self.inserted_records[-1]['our_jsonb'], 'our_bigint': 100000, 'OUR TS': self.expected_ts(our_ts), 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), 'OUR TIME': str(our_time), 'OUR TIME TZ': str(our_time_tz), 'our_store': { "name": "betty", "size": "small" }, 'our_smallint': 1, 'OUR DATE': '1999-09-09T00:00:00+00:00', 'our_varchar': 'our_varchar 4', 'our_uuid': self.inserted_records[-1]['our_uuid'], 'our_real': decimal.Decimal('1.2'), 'our_varchar_10': 'varchar_3', 'our_citext': 'cyclops 3', 'our_cidr': '192.168.101.128/25', 'our_inet': '192.168.101.128/24', 'our_mac': '08:00:2b:01:02:04', 'our_money': None }) # a record with a replication-key value that is higher than the previous bookmark (to be deleted) our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) nyc_tz = pytz.timezone('America/New_York') our_ts_tz = nyc_tz.localize(our_ts) our_time = datetime.time(12, 11, 10) our_time_tz = our_time.isoformat() + "-04:00" our_date = datetime.date(1999, 9, 9) my_uuid = str(uuid.uuid1()) self.inserted_records.append({ 'our_varchar': "our_varchar 4", 'our_varchar_10': "varchar_3", 'our_text': "some text 4", 'our_integer': 55200, 'our_smallint': 1, 'our_bigint': 100000, 'our_decimal': decimal.Decimal('1234567899.99'), quote_ident('OUR TS', cur): our_ts, quote_ident('OUR TS TZ', cur): our_ts_tz, quote_ident('OUR TIME', cur): our_time, quote_ident('OUR TIME TZ', cur): our_time_tz, quote_ident('OUR DATE', cur): our_date, 'our_double': decimal.Decimal('1.1'), 'our_real': decimal.Decimal('1.2'), 'our_boolean': True, 'our_bit': '0', 'our_json': json.dumps('some string'), 'our_jsonb': json.dumps(['burgers are good']), 'our_uuid': my_uuid, 'our_store': 'size=>"small",name=>"betty"', 'our_citext': 'cyclops 3', 'our_cidr': '192.168.101.128/25', 'our_inet': '192.168.101.128/24', 'our_mac': '08:00:2b:01:02:04', 'our_money': None, }) self.expected_records.append({ 'our_decimal': decimal.Decimal('1234567899.99'), 'our_text': 'some text 4', 'our_bit': False, 'our_integer': 55200, 'our_double': decimal.Decimal('1.1'), 'id': 6, 'our_json': self.inserted_records[-1]['our_json'], 'our_boolean': True, 'our_jsonb': self.inserted_records[-1]['our_jsonb'], 'our_bigint': 100000, 'OUR TS': self.expected_ts(our_ts), 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), 'OUR TIME': str(our_time), 'OUR TIME TZ': str(our_time_tz), 'our_store': { "name": "betty", "size": "small" }, 'our_smallint': 1, 'OUR DATE': '1999-09-09T00:00:00+00:00', 'our_varchar': 'our_varchar 4', 'our_uuid': self.inserted_records[-1]['our_uuid'], 'our_real': decimal.Decimal('1.2'), 'our_varchar_10': 'varchar_3', 'our_citext': 'cyclops 3', 'our_cidr': '192.168.101.128/25', 'our_inet': '192.168.101.128/24', 'our_mac': '08:00:2b:01:02:04', 'our_money': None }) db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) # update a record with a replication-key value that is higher than the previous bookmark canon_table_name = db_utils.canonicalized_table_name( cur, test_schema_name, test_table_name) record_pk = 1 our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) our_ts_tz = nyc_tz.localize(our_ts) updated_data = { "OUR TS TZ": our_ts_tz, "our_double": decimal.Decimal("6.6"), "our_money": "$0.00" } self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz( our_ts_tz) self.expected_records[0]["our_double"] = decimal.Decimal("6.6") self.expected_records[0]["our_money"] = "$0.00" db_utils.update_record(cur, canon_table_name, record_pk, updated_data) # update a record with a replication-key value that is lower than the previous bookmark canon_table_name = db_utils.canonicalized_table_name( cur, test_schema_name, test_table_name) record_pk = 2 our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) our_ts_tz = nyc_tz.localize(our_ts) updated_data = { "OUR TS TZ": our_ts_tz, "our_double": decimal.Decimal("6.6"), "our_money": "$0.00" } self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz( our_ts_tz) self.expected_records[1]["our_double"] = decimal.Decimal("6.6") self.expected_records[1]["our_money"] = "$0.00" db_utils.update_record(cur, canon_table_name, record_pk, updated_data) # delete a newly inserted record with a higher replication key than the previous bookmark record_pk = 5 db_utils.delete_record(cur, canon_table_name, record_pk) # run sync job 2 and verify exit codes sync_job_name = runner.run_sync_mode(self, conn_id) exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) # grab records record_count_by_stream = runner.examine_target_output_file( self, conn_id, self.expected_sync_streams(), self.expected_primary_keys()) records_by_stream = runner.get_records_from_target_output() messages = records_by_stream[test_table_name]['messages'] # verify the expected number of records were synced self.assertEqual(3, record_count_by_stream[test_table_name]) # verify the message actions match expectations self.assertEqual('activate_version', messages[0]['action']) self.assertEqual('upsert', messages[1]['action']) self.assertEqual('upsert', messages[2]['action']) self.assertEqual('upsert', messages[3]['action']) # verify the persisted schema matches expectations self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) # verify replicated records meet our expectations... # verify the first record was the bookmarked record from the previous sync self.assertDictEqual(self.expected_records[2], messages[1]['data']) # verify the expected updated record with a higher replication-key value was replicated self.assertDictEqual(self.expected_records[0], messages[2]['data']) # verify the expected inserted record with a lower replication-key value was NOT replicated actual_record_ids = [message['data']['id'] for message in messages[1:]] expected_record_id = self.expected_records[3]['id'] self.assertNotIn(expected_record_id, actual_record_ids) # verify the deleted record with a lower replication-key value was NOT replicated expected_record_id = self.expected_records[4]['id'] self.assertNotIn(expected_record_id, actual_record_ids) # verify the expected updated record with a lower replication-key value was NOT replicated expected_record_id = self.expected_records[1]['id'] self.assertNotIn(expected_record_id, actual_record_ids) # verify the expected inserted record with a higher replication-key value was replicated self.assertDictEqual(self.expected_records[5], messages[3]['data']) # verify records are in ascending order by replication-key value self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) print("records are correct") # get bookmarked state state = menagerie.get_state(conn_id) bookmark = state['bookmarks'][ 'dev-public-postgres_incremental_replication_test'] # verify the bookmarked state matches our expectations self.assertIsNone(bookmark.get('lsn')) self.assertEqual(bookmark['version'], table_version) self.assertEqual(bookmark['replication_key'], expected_replication_key) self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) #--------------------------------------------------------------------- # run sync AGAIN after deleting a record and get 1 record (prev bookmark) #---------------------------------------------------------------------- # Delete a pre-existing record from the database with db_utils.get_test_connection('dev') as conn: conn.autocommit = True with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: # delete a record with a lower replication key than the previous sync record_pk = 1 db_utils.delete_record(cur, canon_table_name, record_pk) # run sync job 3 and verify exit codes sync_job_name = runner.run_sync_mode(self, conn_id) exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) # get records record_count_by_stream = runner.examine_target_output_file( self, conn_id, self.expected_sync_streams(), self.expected_primary_keys()) records_by_stream = runner.get_records_from_target_output() messages = records_by_stream[test_table_name]['messages'] # verify the expected number of records were replicated self.assertEqual(1, record_count_by_stream[test_table_name]) # verify messages match our expectations self.assertEqual(2, len(messages)) self.assertEqual(messages[0]['action'], 'activate_version') self.assertEqual(messages[1]['action'], 'upsert') self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) # verify replicated records meet our expectations... # verify we did not re-replicate the deleted record actual_record_ids = [message['data']['id'] for message in messages[1:]] expected_record_id = self.expected_records[0]['id'] self.assertNotIn(expected_record_id, actual_record_ids) # verify only the previously bookmarked record was synced self.assertDictEqual(self.expected_records[5], messages[1]['data']) print("records are correct") # get bookmarked state state = menagerie.get_state(conn_id) bookmark = state['bookmarks'][ 'dev-public-postgres_incremental_replication_test'] # verify the bookmarked state matches our expectations self.assertIsNone(bookmark.get('lsn')) self.assertEqual(bookmark['version'], table_version) self.assertEqual(bookmark['replication_key'], expected_replication_key) self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key])
def setUp(self): db_utils.ensure_environment_variables_set() db_utils.ensure_db("dev") self.maxDiff = None with db_utils.get_test_connection('dev') as conn: conn.autocommit = True with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: canon_table_name = db_utils.canonicalized_table_name( cur, test_schema_name, test_table_name) cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) create_table_sql = """ CREATE TABLE {} (id SERIAL PRIMARY KEY, our_varchar VARCHAR, our_varchar_10 VARCHAR(10), our_text TEXT, our_integer INTEGER, our_smallint SMALLINT, our_bigint BIGINT, our_decimal NUMERIC({},{}), "OUR TS" TIMESTAMP WITHOUT TIME ZONE, "OUR TS TZ" TIMESTAMP WITH TIME ZONE, "OUR TIME" TIME WITHOUT TIME ZONE, "OUR TIME TZ" TIME WITH TIME ZONE, "OUR DATE" DATE, our_double DOUBLE PRECISION, our_real REAL, our_boolean BOOLEAN, our_bit BIT(1), our_json JSON, our_jsonb JSONB, our_uuid UUID, our_store HSTORE, our_citext CITEXT, our_inet inet, our_cidr cidr, our_mac macaddr, our_alignment_enum ALIGNMENT, our_money money) """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) cur.execute(create_table_sql) # insert fixture data and track expected records self.inserted_records = [] self.expected_records = [] # record 1 our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) nyc_tz = pytz.timezone('America/New_York') our_ts_tz = nyc_tz.localize(our_ts) our_time = datetime.time(12, 11, 10) our_time_tz = our_time.isoformat() + "-04:00" our_date = datetime.date(1998, 3, 4) my_uuid = str(uuid.uuid1()) self.inserted_records.append({ 'our_varchar': "our_varchar", 'our_varchar_10': "varchar_10", 'our_text': "some text", 'our_integer': 44100, 'our_smallint': 1, 'our_bigint': 1000000, 'our_decimal': decimal.Decimal('.01'), quote_ident('OUR TS', cur): our_ts, quote_ident('OUR TS TZ', cur): our_ts_tz, quote_ident('OUR TIME', cur): our_time, quote_ident('OUR TIME TZ', cur): our_time_tz, quote_ident('OUR DATE', cur): our_date, 'our_double': decimal.Decimal('1.1'), 'our_real': 1.2, 'our_boolean': True, 'our_bit': '0', 'our_json': json.dumps({'secret': 55}), 'our_jsonb': json.dumps({'burgers': 'good'}), 'our_uuid': my_uuid, 'our_store': 'size=>"small",name=>"betty"', 'our_citext': 'maGICKal 4', 'our_cidr': '192.168.100.128/25', 'our_inet': '192.168.100.128/24', 'our_mac': '08:00:2b:01:02:03', 'our_alignment_enum': 'good', 'our_money': '100.1122', }) self.expected_records.append({ 'our_decimal': decimal.Decimal('.01'), 'our_text': 'some text', 'our_bit': False, 'our_integer': 44100, 'our_double': decimal.Decimal('1.1'), 'id': 1, 'our_json': '{"secret": 55}', 'our_boolean': True, 'our_jsonb': '{"burgers": "good"}', 'our_bigint': 1000000, 'OUR TS': self.expected_ts(our_ts), 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), 'OUR TIME': str(our_time), 'OUR TIME TZ': str(our_time_tz), 'our_store': { "name": "betty", "size": "small" }, 'our_smallint': 1, 'OUR DATE': '1998-03-04T00:00:00+00:00', 'our_varchar': 'our_varchar', 'our_uuid': self.inserted_records[0]['our_uuid'], 'our_real': decimal.Decimal('1.2'), 'our_varchar_10': 'varchar_10', 'our_citext': self.inserted_records[0]['our_citext'], 'our_inet': self.inserted_records[0]['our_inet'], 'our_cidr': self.inserted_records[0]['our_cidr'], 'our_mac': self.inserted_records[0]['our_mac'], 'our_alignment_enum': self.inserted_records[0]['our_alignment_enum'], 'our_money': '$100.11' }) # record 2 our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) nyc_tz = pytz.timezone('America/New_York') our_ts_tz = nyc_tz.localize(our_ts) our_time = datetime.time(10, 9, 8) our_time_tz = our_time.isoformat() + "-04:00" our_date = datetime.date(1964, 7, 1) my_uuid = str(uuid.uuid1()) self.inserted_records.append({ 'our_varchar': "our_varchar 2", 'our_varchar_10': "varchar_10", 'our_text': "some text 2", 'our_integer': 44101, 'our_smallint': 2, 'our_bigint': 1000001, 'our_decimal': decimal.Decimal('.02'), quote_ident('OUR TS', cur): our_ts, quote_ident('OUR TS TZ', cur): our_ts_tz, quote_ident('OUR TIME', cur): our_time, quote_ident('OUR TIME TZ', cur): our_time_tz, quote_ident('OUR DATE', cur): our_date, 'our_double': decimal.Decimal('1.1'), 'our_real': decimal.Decimal('1.2'), 'our_boolean': True, 'our_bit': '1', 'our_json': json.dumps(["nymn 77"]), 'our_jsonb': json.dumps({'burgers': 'good++'}), 'our_uuid': my_uuid, 'our_store': 'dances=>"floor",name=>"betty"', 'our_citext': 'maGICKal 2', 'our_cidr': '192.168.101.128/25', 'our_inet': '192.168.101.128/24', 'our_mac': '08:00:2b:01:02:04', 'our_money': None }) self.expected_records.append({ 'our_decimal': decimal.Decimal('.02'), 'OUR TIME': str(our_time), 'our_text': 'some text 2', 'our_bit': True, 'our_integer': 44101, 'our_double': decimal.Decimal('1.1'), 'id': 2, 'our_json': '["nymn 77"]', 'our_boolean': True, 'our_jsonb': '{"burgers": "good++"}', 'our_bigint': 1000001, 'OUR TIME TZ': str(our_time_tz), 'our_store': { "name": "betty", "dances": "floor" }, 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), 'our_smallint': 2, 'OUR DATE': '1964-07-01T00:00:00+00:00', 'our_varchar': 'our_varchar 2', 'OUR TS': self.expected_ts(our_ts), 'our_uuid': self.inserted_records[1]['our_uuid'], 'our_real': decimal.Decimal('1.2'), 'our_varchar_10': 'varchar_10', 'our_citext': self.inserted_records[1]['our_citext'], 'our_inet': self.inserted_records[1]['our_inet'], 'our_cidr': self.inserted_records[1]['our_cidr'], 'our_mac': self.inserted_records[1]['our_mac'], 'our_alignment_enum': None, 'our_money': None }) # record 3 self.inserted_records.append({ 'our_decimal': decimal.Decimal('NaN'), 'our_double': float('nan'), 'our_real': float('-inf') }) self.expected_records.append({ 'id': 3, # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support # them and now we are at least consistent(ly wrong). 'our_decimal': None, 'our_double': None, 'our_real': None, # any field without a set value will be set to NULL 'OUR TIME': None, 'our_text': None, 'our_bit': None, 'our_integer': None, 'our_json': None, 'our_boolean': None, 'our_jsonb': None, 'our_bigint': None, 'OUR TIME TZ': None, 'our_store': None, 'OUR TS TZ': None, 'our_smallint': None, 'OUR DATE': None, 'our_varchar': None, 'OUR TS': None, 'our_uuid': None, 'our_varchar_10': None, 'our_citext': None, 'our_inet': None, 'our_cidr': None, 'our_mac': None, 'our_alignment_enum': None, 'our_money': None }) for record in self.inserted_records: db_utils.insert_record(cur, test_table_name, record)
def test_identifier(self): self.assertEqual(quote_ident("blah-blah", self.conn), '"blah-blah"') self.assertEqual(quote_ident('quote"inside', self.conn), '"quote""inside"')
def store_data(dbcon, df_data, tablename, ignore_previous, l_date=None, r_date=None, **kwargs): with dbcon.cursor() as dbcur: if ignore_previous: type_of_data = kwargs.get('td', '') logging.debug('storing {} elements:\n{}\n'.format( type_of_data, df_data.head())) if not df_data.empty: psql_insert_copy(dbcon, df_data, tablename) return {type_of_data: df_data} return dict() else: existing_data = psql.read_sql( ''' SELECT * FROM %s WHERE date_received >= '%s'::date AND date_received < '%s'::date ''' % (AsIs(quote_ident(tablename, dbcur)), l_date, r_date), dbcon) new_data = df_data.drop('update_stamp', axis=1) old_data = existing_data.drop('update_stamp', axis=1) new_ids = set(new_data._id) old_ids = set(old_data._id) common_ids = new_ids & old_ids logging.info('Number of new ids, old ids, common ids:\n{}'.format( (len(new_ids), len(old_ids), len(common_ids)))) updated_data = new_data[ ~new_data.apply(tuple, 1).isin(old_data.apply(tuple, 1)) & new_data['_id'].isin(common_ids)] updated_ids = set(updated_data._id) deleted_ids = old_ids - new_ids added_ids = new_ids - old_ids logging.info('updated_ids (total {} elements):\n{}'.format( len(updated_ids), sorted(updated_ids, reverse=True)[:min(len(updated_ids), 50)])) logging.info('added_ids (total {} elements):\n{}'.format( len(added_ids), sorted(added_ids, reverse=True)[:min(len(added_ids), 50)])) logging.info('deleted_ids (total {} elements):\n{}'.format( len(deleted_ids), sorted(deleted_ids, reverse=True)[:min(len(deleted_ids), 50)])) already_deleted = psql.read_sql( ''' SELECT _id FROM %s WHERE date_received IS null ''' % (AsIs(quote_ident(tablename, dbcur))), dbcon) updated_rows = df_data.loc[df_data['_id'].isin(updated_ids)] deleted_rows = existing_data.loc[ existing_data['_id'].isin(deleted_ids) & ~existing_data['_id'].isin(already_deleted['_id'])].copy() deleted_rows = deleted_rows.assign( **{ col: None for col in deleted_rows if col not in ['_id', 'complaint_id', 'update_stamp'] }) deleted_rows['update_stamp'] = dt.datetime.now() deleted_rows = deleted_rows.drop_duplicates(ignore_index=True) added_rows = df_data.loc[df_data['_id'].isin(added_ids)] stored_data = {} stored_data.update( store_data(dbcon, updated_rows, tablename, ignore_previous=True, td='updated')) stored_data.update( store_data(dbcon, deleted_rows, tablename, ignore_previous=True, td='deleted')) stored_data.update( store_data(dbcon, added_rows, tablename, ignore_previous=True, td='added')) return stored_data
def setUp(self): db_utils.ensure_environment_variables_set() db_utils.ensure_db(test_db) self.maxDiff = None with db_utils.get_test_connection(test_db) as conn: conn.autocommit = True with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: # db_utils.ensure_replication_slot(cur, test_db) canonicalized_table_name = db_utils.canonicalized_table_name( cur, test_schema_name, test_table_name) create_table_sql = """ CREATE TABLE {} (id SERIAL PRIMARY KEY, our_varchar VARCHAR, our_varchar_10 VARCHAR(10), our_text TEXT, our_text_2 TEXT, our_integer INTEGER, our_smallint SMALLINT, our_bigint BIGINT, our_decimal NUMERIC(12,2), "OUR TS" TIMESTAMP WITHOUT TIME ZONE, "OUR TS TZ" TIMESTAMP WITH TIME ZONE, "OUR TIME" TIME WITHOUT TIME ZONE, "OUR TIME TZ" TIME WITH TIME ZONE, "OUR DATE" DATE, our_double DOUBLE PRECISION, our_real REAL, our_boolean BOOLEAN, our_bit BIT(1), our_json JSON, our_jsonb JSONB, our_uuid UUID, our_store HSTORE, our_citext CITEXT, our_cidr cidr, our_inet inet, our_mac macaddr, our_alignment_enum ALIGNMENT, our_money money, invalid_bigserial BIGSERIAL, invalid_bit_varying BIT VARYING, invalid_box BOX, invalid_bytea BYTEA, invalid_circle CIRCLE, invalid_interval INTERVAL, invalid_line LINE, invalid_lseg LSEG, invalid_path PATH, invalid_pg_lsn PG_LSN, invalid_point POINT, invalid_polygon POLYGON, invalid_serial SERIAL, invalid_smallserial SMALLSERIAL, invalid_tsquery TSQUERY, invalid_tsvector TSVECTOR, invalid_txid_snapshot TXID_SNAPSHOT, invalid_xml XML) """.format(canonicalized_table_name) cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) cur.execute(create_table_sql) #insert fixture data 1 our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) nyc_tz = pytz.timezone('America/New_York') our_ts_tz = nyc_tz.localize(our_ts) our_time = datetime.time(12, 11, 10) our_time_tz = our_time.isoformat() + "-04:00" our_date = datetime.date(1998, 3, 4) my_uuid = str(uuid.uuid1()) self.recs = [] for _ in range(500): our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) nyc_tz = pytz.timezone('America/New_York') our_ts_tz = nyc_tz.localize(our_ts) our_time = datetime.time(10, 9, 8) our_time_tz = our_time.isoformat() + "-04:00" our_date = datetime.date(1964, 7, 1) my_uuid = str(uuid.uuid1()) record = { 'our_varchar': "our_varchar 4", 'our_varchar_10': "varchar_10", 'our_text': "some text 2", 'our_text_2': "NOT SELECTED", 'our_integer': 44101, 'our_smallint': 2, 'our_bigint': 1000001, 'our_decimal': decimal.Decimal('9876543210.02'), quote_ident('OUR TS', cur): our_ts, quote_ident('OUR TS TZ', cur): our_ts_tz, quote_ident('OUR TIME', cur): our_time, quote_ident('OUR TIME TZ', cur): our_time_tz, quote_ident('OUR DATE', cur): our_date, 'our_double': 1.1, 'our_real': 1.2, 'our_boolean': True, 'our_bit': '1', 'our_json': json.dumps({'nymn': 77}), 'our_jsonb': json.dumps({'burgers': 'good++'}), 'our_uuid': my_uuid, 'our_store': 'dances=>"floor",name=>"betty"', 'our_citext': 'maGICKal 2', 'our_cidr': '192.168.101.128/25', 'our_inet': '192.168.101.128/24', 'our_mac': '08:00:2b:01:02:04', } db_utils.insert_record(cur, test_table_name, record) self.recs.append(record) cur.execute("""ANALYZE {}""".format(canonicalized_table_name))
def setUp(self): db_utils.ensure_environment_variables_set() db_utils.ensure_db('dev') self.maxDiff = None with db_utils.get_test_connection('dev') as conn: conn.autocommit = True with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) create_table_sql = """ CREATE TABLE {} (id SERIAL PRIMARY KEY, our_varchar VARCHAR, our_varchar_10 VARCHAR(10), our_text TEXT, our_integer INTEGER, our_smallint SMALLINT, our_bigint BIGINT, our_decimal NUMERIC(12,2), "OUR TS" TIMESTAMP WITHOUT TIME ZONE, "OUR TS TZ" TIMESTAMP WITH TIME ZONE, "OUR TIME" TIME WITHOUT TIME ZONE, "OUR TIME TZ" TIME WITH TIME ZONE, "OUR DATE" DATE, our_double DOUBLE PRECISION, our_real REAL, our_boolean BOOLEAN, our_bit BIT(1), our_json JSON, our_jsonb JSONB, our_uuid UUID, our_store HSTORE, our_citext CITEXT, our_inet inet, our_cidr cidr, our_mac macaddr, our_money money) """.format( db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) cur.execute(create_table_sql) # insert fixture data and track expected records self.inserted_records = [] self.expected_records = [] nyc_tz = pytz.timezone('America/New_York') our_time_offset = "-04:00" # record 1 our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) our_ts_tz = nyc_tz.localize(our_ts) our_time = datetime.time(10, 9, 8) our_time_tz = our_time.isoformat() + our_time_offset our_date = datetime.date(1964, 7, 1) my_uuid = str(uuid.uuid1()) self.inserted_records.append({ 'our_varchar': "our_varchar 2", 'our_varchar_10': "varchar_10", 'our_text': "some text 2", 'our_integer': 44101, 'our_smallint': 2, 'our_bigint': 1000001, 'our_decimal': decimal.Decimal('9876543210.02'), quote_ident('OUR TS', cur): our_ts, quote_ident('OUR TS TZ', cur): our_ts_tz, quote_ident('OUR TIME', cur): our_time, quote_ident('OUR TIME TZ', cur): our_time_tz, quote_ident('OUR DATE', cur): our_date, 'our_double': decimal.Decimal('1.1'), 'our_real': decimal.Decimal('1.2'), 'our_boolean': True, 'our_bit': '1', 'our_json': json.dumps({'nymn': 77}), 'our_jsonb': json.dumps({'burgers': 'good++'}), 'our_uuid': my_uuid, 'our_citext': 'cyclops 2', 'our_store': 'dances=>"floor",name=>"betty"', 'our_cidr': '192.168.101.128/25', 'our_inet': '192.168.101.128/24', 'our_mac': '08:00:2b:01:02:04', }) self.expected_records.append({ 'our_decimal': decimal.Decimal('9876543210.02'), 'OUR TIME': str(our_time), 'our_text': 'some text 2', 'our_bit': True, 'our_integer': 44101, 'our_double': decimal.Decimal('1.1'), 'id': 1, 'our_json': '{"nymn": 77}', 'our_boolean': True, 'our_jsonb': '{"burgers": "good++"}', 'our_bigint': 1000001, 'OUR TIME TZ': str(our_time_tz), 'our_store': { "name": "betty", "dances": "floor" }, 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), 'our_smallint': 2, 'OUR DATE': '1964-07-01T00:00:00+00:00', 'our_varchar': 'our_varchar 2', 'OUR TS': self.expected_ts(our_ts), 'our_uuid': self.inserted_records[0]['our_uuid'], 'our_real': decimal.Decimal('1.2'), 'our_varchar_10': 'varchar_10', 'our_citext': self.inserted_records[0]['our_citext'], 'our_inet': self.inserted_records[0]['our_inet'], 'our_cidr': self.inserted_records[0]['our_cidr'], 'our_mac': self.inserted_records[0]['our_mac'], 'our_money': None }) # record 2 our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) our_ts_tz = nyc_tz.localize(our_ts) our_time = datetime.time(12, 11, 10) our_time_tz = our_time.isoformat() + our_time_offset our_date = datetime.date(1998, 3, 4) my_uuid = str(uuid.uuid1()) self.inserted_records.append({ 'our_varchar': "our_varchar", 'our_varchar_10': "varchar_10", 'our_text': "some text", 'our_integer': 44100, 'our_smallint': 1, 'our_bigint': 1000000, 'our_decimal': decimal.Decimal('1234567890.01'), quote_ident('OUR TS', cur): our_ts, quote_ident('OUR TS TZ', cur): our_ts_tz, quote_ident('OUR TIME', cur): our_time, quote_ident('OUR TIME TZ', cur): our_time_tz, quote_ident('OUR DATE', cur): our_date, 'our_double': decimal.Decimal('1.1'), 'our_real': decimal.Decimal('1.2'), 'our_boolean': True, 'our_bit': '0', 'our_json': json.dumps({'secret': 55}), 'our_jsonb': json.dumps(6777777), 'our_uuid': my_uuid, 'our_store': 'size=>"small",name=>"betty"', 'our_citext': 'cyclops 1', 'our_cidr': '192.168.100.128/25', 'our_inet': '192.168.100.128/24', 'our_mac': '08:00:2b:01:02:03', 'our_money': '$1,445.5678' }) self.expected_records.append({ 'our_decimal': decimal.Decimal('1234567890.01'), 'our_text': 'some text', 'our_bit': False, 'our_integer': 44100, 'our_double': decimal.Decimal('1.1'), 'id': 2, 'our_json': '{"secret": 55}', 'our_boolean': True, 'our_jsonb': self.inserted_records[1]['our_jsonb'], 'our_bigint': 1000000, 'OUR TS': self.expected_ts(our_ts), 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), 'OUR TIME': str(our_time), 'OUR TIME TZ': str(our_time_tz), 'our_store': { "name": "betty", "size": "small" }, 'our_smallint': 1, 'OUR DATE': '1998-03-04T00:00:00+00:00', 'our_varchar': 'our_varchar', 'our_uuid': self.inserted_records[1]['our_uuid'], 'our_real': decimal.Decimal('1.2'), 'our_varchar_10': 'varchar_10', 'our_citext': self.inserted_records[1]['our_citext'], 'our_inet': self.inserted_records[1]['our_inet'], 'our_cidr': self.inserted_records[1]['our_cidr'], 'our_mac': self.inserted_records[1]['our_mac'], 'our_money': '$1,445.57' }) # record 3 our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) our_ts_tz = nyc_tz.localize(our_ts) our_time = datetime.time(12, 11, 10) our_time_tz = our_time.isoformat() + our_time_offset our_date = datetime.date(1998, 3, 4) my_uuid = str(uuid.uuid1()) self.inserted_records.append({ 'our_varchar': "our_varchar", 'our_varchar_10': "varchar_10", 'our_text': "some text", 'our_integer': 44100, 'our_smallint': 1, 'our_bigint': 1000000, 'our_decimal': decimal.Decimal('1234567890.01'), quote_ident('OUR TS', cur): our_ts, quote_ident('OUR TS TZ', cur): our_ts_tz, quote_ident('OUR TIME', cur): our_time, quote_ident('OUR TIME TZ', cur): our_time_tz, quote_ident('OUR DATE', cur): our_date, 'our_double': '1.1', 'our_real': decimal.Decimal('1.2'), 'our_boolean': True, 'our_bit': '0', 'our_json': json.dumps({'secret': 55}), 'our_jsonb': json.dumps(6777777), 'our_uuid': my_uuid, 'our_store': 'size=>"small",name=>"betty"', 'our_citext': 'cyclops 1', 'our_cidr': '192.168.100.128/25', 'our_inet': '192.168.100.128/24', 'our_mac': '08:00:2b:01:02:03', 'our_money': '$1,445.5678' }) self.expected_records.append({ 'our_decimal': decimal.Decimal('1234567890.01'), 'our_text': 'some text', 'our_bit': False, 'our_integer': 44100, 'our_double': decimal.Decimal('1.1'), 'id': 3, 'our_json': '{"secret": 55}', 'our_boolean': True, 'our_jsonb': self.inserted_records[1]['our_jsonb'], 'our_bigint': 1000000, 'OUR TS': self.expected_ts(our_ts), 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), 'OUR TIME': str(our_time), 'OUR TIME TZ': str(our_time_tz), 'our_store': { "name": "betty", "size": "small" }, 'our_smallint': 1, 'OUR DATE': '1998-03-04T00:00:00+00:00', 'our_varchar': 'our_varchar', 'our_uuid': self.inserted_records[2]['our_uuid'], 'our_real': decimal.Decimal('1.2'), 'our_varchar_10': 'varchar_10', 'our_citext': self.inserted_records[2]['our_citext'], 'our_inet': self.inserted_records[2]['our_inet'], 'our_cidr': self.inserted_records[2]['our_cidr'], 'our_mac': self.inserted_records[2]['our_mac'], 'our_money': '$1,445.57' }) for rec in self.inserted_records: db_utils.insert_record(cur, test_table_name, rec)