def drop_schema(context, schema_name, dbname): SQL = 'drop schema %s cascade' % schema_name with dbconn.connect(dbconn.DbURL(dbname=dbname)) as conn: dbconn.execSQL(conn, SQL) conn.commit() if check_schema_exists(context, schema_name, dbname): raise Exception('Unable to successfully drop the schema %s' % schema_name)
def create_int_table(context, table_name, table_type='heap', dbname='testdb'): CREATE_TABLE_SQL = None NROW = 1000 table_type = table_type.upper() if table_type == 'AO': CREATE_TABLE_SQL = 'create table %s WITH(APPENDONLY=TRUE) as select generate_series(1,%d) as c1' % ( table_name, NROW) elif table_type == 'CO': CREATE_TABLE_SQL = 'create table %s WITH(APPENDONLY=TRUE, orientation=column) as select generate_series(1, %d) as c1' % ( table_name, NROW) elif table_type == 'HEAP': CREATE_TABLE_SQL = 'create table %s as select generate_series(1, %d) as c1' % (table_name, NROW) if CREATE_TABLE_SQL is None: raise Exception('Invalid table type specified') SELECT_TABLE_SQL = 'select count(*) from %s' % table_name with dbconn.connect(dbconn.DbURL(dbname=dbname)) as conn: dbconn.execSQL(conn, CREATE_TABLE_SQL) conn.commit() result = dbconn.execSQLForSingleton(conn, SELECT_TABLE_SQL) if result != NROW: raise Exception('Integer table creation was not successful. Expected %d does not match %d' % (NROW, result))
def execute(self): dburl = dbconn.DbURL() query = self.INSERT_VERIFICATION_ENTRY % ( self.token, self.type, self.content, VerificationState.RUNNING) with dbconn.connect(dburl, allowSystemTableMods='dml') as conn: dbconn.execSQL(conn, query) conn.commit()
def wrapper(*args, **kwargs): dbconn.execSQL(ctx.conn, ''' DROP TABLE IF EXISTS {name}; '''.format(name=name)) ctx.conn.commit() return func(*args, **kwargs)
def execute(self): dburl = dbconn.DbURL() query = self.UPDATE_VERIFICATION_ENTRY % (self.state, self.done, self.mismatch, self.token) with dbconn.connect(dburl, allowSystemTableMods='dml') as conn: dbconn.execSQL(conn, query) conn.commit()
def wrapper(*args, **kwargs): dbconn.execSQL(ctx.conn, ''' DROP SCHEMA IF EXISTS gpexpand CASCADE; ''') ctx.conn.commit() return func(*args, **kwargs)
def insert_status(status): ctx.day += 1 dbconn.execSQL(ctx.conn, ''' INSERT INTO gpexpand.status VALUES ( '{status}', date '2001-01-01' + interval '{day} day'); '''.format(status=status, day=ctx.day)) ctx.conn.commit()
def create_partition(context, tablename, storage_type, dbname, compression_type=None, partition=True, rowcount=1094, with_data=True, host=None, port=0, user=None): interval = '1 year' table_definition = 'Column1 int, Column2 varchar(20), Column3 date' create_table_str = "Create table " + tablename + "(" + table_definition + ")" storage_type_dict = {'ao': 'row', 'co': 'column'} part_table = " Distributed Randomly Partition by list(Column2) \ Subpartition by range(Column3) Subpartition Template \ (start (date '%s') end (date '%s') every (interval '%s')) \ (Partition p1 values('backup') , Partition p2 values('restore')) " \ % (PARTITION_START_DATE, PARTITION_END_DATE, interval) if storage_type == "heap": create_table_str = create_table_str if partition: create_table_str = create_table_str + part_table elif storage_type == "ao" or storage_type == "co": create_table_str = create_table_str + " WITH(appendonly = true, orientation = %s) " % storage_type_dict[ storage_type] if compression_type is not None: create_table_str = create_table_str[:-2] + ", compresstype = " + compression_type + ") " if partition: create_table_str = create_table_str + part_table create_table_str = create_table_str + ";" with dbconn.connect(dbconn.DbURL(hostname=host, port=port, username=user, dbname=dbname)) as conn: dbconn.execSQL(conn, create_table_str) conn.commit() if with_data: populate_partition(tablename, PARTITION_START_DATE, dbname, 0, rowcount, host, port, user)
def impl(context, tablename, schemaname): if not check_schema_exists(context, schemaname, context.dbname): raise Exception("Schema %s does not exist in database %s" % (schemaname, context.dbname)) drop_table_if_exists(context, '.'.join([schemaname, tablename]), context.dbname) dbconn.execSQL(context.conn, CREATE_MULTI_PARTITION_TABLE_SQL % (schemaname, tablename)) context.conn.commit() check_table_exists(context, context.dbname, '.'.join([schemaname, tablename]), table_type = 'ao')
def drop_table(context, table_name, dbname, host=None, port=0, user=None): SQL = 'drop table %s' % table_name with dbconn.connect(dbconn.DbURL(hostname=host, username=user, port=port, dbname=dbname)) as conn: dbconn.execSQL(conn, SQL) conn.commit() if check_table_exists(context, table_name=table_name, dbname=dbname, host=host, port=port, user=user): raise Exception('Unable to successfully drop the table %s' % table_name)
def wrapper(*args, **kwargs): dbconn.execSQL(ctx.conn, ''' UPDATE gpexpand.status_detail SET STATUS='COMPLETED' WHERE fq_name='{name}'; '''.format(name=name)) ctx.conn.commit() return func(*args, **kwargs)
def updateNnHost(conn, new_host): 'update the LOCATION field for each record in pg_exttable' dbconn.execSQL(conn, "set allow_system_table_mods = 'DML'") dbconn.execSQL(conn, "START TRANSACTION") cursor = dbconn.execSQL(conn, "SELECT location, reloid, relname FROM pg_exttable, pg_class WHERE reloid = relfilenode") for row in cursor: updateOneRecord(conn, new_host, row) conn.commit()
def run(self): while check_pg_class_lock(self.dbname) != 1: pass with dbconn.connect(dbconn.DbURL(dbname=self.dbname)) as conn: dbconn.execSQL(conn, self.query) self.result = 0 self.completed = True self.halt = False
def create_indexes(context, table_name, indexname, dbname): btree_index_sql = "create index btree_%s on %s using btree(column1);" % (indexname, table_name) bitmap_index_sql = "create index bitmap_%s on %s using bitmap(column3);" % (indexname, table_name) index_sql = btree_index_sql + bitmap_index_sql with dbconn.connect(dbconn.DbURL(dbname=dbname)) as conn: dbconn.execSQL(conn, index_sql) conn.commit() validate_index(context, table_name, dbname)
def populate_partition(tablename, start_date, dbname, data_offset, rowcount=1094, host=None, port=0, user=None): insert_sql_str = "insert into %s select i+%d, 'backup', i + date '%s' from generate_series(0,%d) as i" % ( tablename, data_offset, start_date, rowcount) insert_sql_str += "; insert into %s select i+%d, 'restore', i + date '%s' from generate_series(0,%d) as i" % ( tablename, data_offset, start_date, rowcount) with dbconn.connect(dbconn.DbURL(hostname=host, port=port, username=user, dbname=dbname)) as conn: dbconn.execSQL(conn, insert_sql_str) conn.commit()
def _analyze(self, restore_db, master_port): conn = None logger.info('Commencing analyze of %s database, please wait' % restore_db) try: dburl = dbconn.DbURL(port=master_port, dbname=restore_db) conn = dbconn.connect(dburl) execSQL(conn, 'analyze') conn.commit() except Exception, e: logger.warn('Issue with analyze of %s database' % restore_db)
def pauseFaultProber(self): assert not self.__isPaused assert self.__masterDbUrl is not None # must be initialized assert self.__conn is None logger.debug("Pausing fault prober") self.__conn = dbconn.connect(self.__masterDbUrl, True) # use utility mode so we don't do any segment connection stuff dbconn.execSQL( self.__conn, "set gp_fts_probe_pause = on") self.__isPaused = True
def updateOneRecord(conn, new_host, row): "Updates the LOCATION field of one record" if not (isPxfTable(row[0])): return new_location = makeNewLocation(new_host, row[0]) dbconn.execSQL(conn, "UPDATE pg_exttable SET location = '" + new_location + "' WHERE reloid = " + str(row[1])) print "Updated LOCATION for table ", row[2], "oid: ", row[1], "\n Old LOCATION: ", row[ 0 ], "\n New LOCATION: ", new_location
def execute(dburl, query): rows = [[]] try: conn = dbconn.connect(dburl) curs = dbconn.execSQL(conn, 'set enforce_virtual_segment_number = 16') curs = dbconn.execSQL(conn, query) rows = curs.fetchall() conn.commit() conn.close() except DatabaseError, ex: logger.error('Failed to execute the statement on the database. Please, check log file for errors.') logger.error(ex) sys.exit(3)
def create_external_partition(context, tablename, dbname, port, filename): table_definition = 'Column1 int, Column2 varchar(20), Column3 date' create_table_str = "Create table %s (%s) Distributed randomly \ Partition by range(Column3) ( \ partition p_1 start(date '2010-01-01') end(date '2011-01-01') with (appendonly=true, orientation=column, compresstype=zlib, compresslevel=1), \ partition p_2 start(date '2011-01-01') end(date '2012-01-01') with (appendonly=true, orientation=row, compresstype=zlib, compresslevel=1), \ partition s_3 start(date '2012-01-01') end(date '2013-01-01') with (appendonly=true, orientation=column), \ partition s_4 start(date '2013-01-01') end(date '2014-01-01') with (appendonly=true, orientation=row), \ partition s_5 start(date '2014-01-01') end(date '2015-01-01') ) \ ;" % (tablename, table_definition) master_hostname = get_master_hostname(); create_ext_table_str = "Create readable external table %s_ret (%s) \ location ('gpfdist://%s:%s/%s') \ format 'csv' encoding 'utf-8' \ log errors segment reject limit 1000 \ ;" % (tablename, table_definition, master_hostname[0][0].strip(), port, filename) alter_table_str = "Alter table %s exchange partition p_2 \ with table %s_ret without validation \ ;" % (tablename, tablename) drop_table_str = "Drop table %s_ret;" % (tablename) with dbconn.connect(dbconn.DbURL(dbname=dbname)) as conn: dbconn.execSQL(conn, create_table_str) dbconn.execSQL(conn, create_ext_table_str) dbconn.execSQL(conn, alter_table_str) dbconn.execSQL(conn, drop_table_str) conn.commit() populate_partition(tablename, '2010-01-01', dbname, 0, 100)
def reload_tables(self): with dbconn.connect(dbconn.DbURL(dbname=self.database, port=self.port)) as conn: conn.commit() #Commit the implicit transaction started by connect for schema_name, table_name, sort_column_list in self.table_list: logger.info('Starting reload for table {schema}.{table}'.format(schema=schema_name, table=table_name)) logger.info('Table {schema}.{table} has {rows} rows and {size} size' .format(schema=schema_name, table=table_name, rows=self.get_row_count('%s.%s' % (schema_name, table_name)), size=self.get_table_size(schema_name, table_name))) if not self.check_indexes(schema_name, table_name): logger.info('Skipping reload for {schema}.{table}'.format(schema=schema_name, table=table_name)) continue start = time.time() dbconn.execSQL(conn, 'BEGIN') dbconn.execSQL(conn, """CREATE TEMP TABLE temp_{table} AS SELECT * FROM {schema}.{table}""" .format(schema=schema_name, table=table_name)) temp_row_count = dbconn.execSQLForSingleton(conn, """SELECT count(*) FROM temp_{table}""".format(table=table_name)) table_row_count = dbconn.execSQLForSingleton(conn, """SELECT count(*) from {schema}.{table}""" .format(table=table_name, schema=schema_name)) if temp_row_count != table_row_count: raise Exception('Row count for temp table(%s) does not match(%s)' % (temp_row_count, table_row_count)) dbconn.execSQL(conn, 'TRUNCATE TABLE {schema}.{table}'.format(schema=schema_name, table=table_name)) sort_order = ['%s %s' % (c[0], c[1]) for c in sort_column_list] parent_schema_name, parent_table_name = self.parent_partition_map[(schema_name, table_name)] dbconn.execSQL(conn, """INSERT INTO {parent_schema}.{parent_table} SELECT * FROM temp_{table} ORDER BY {column_list}""" .format(parent_schema=parent_schema_name, parent_table=parent_table_name, table=table_name, column_list=','.join(sort_order))) conn.commit() end = time.time() logger.info('Finished reload for table {schema}.{table} in time {sec} seconds' .format(schema=schema_name, table=table_name, sec=(end-start)))
def run(self): logger.debug('Executing query (%s:%s) for segment (%s:%s) on database (%s)' % ( self.qname, self.query, self.hostname, self.port, self.dbname)) with dbconn.connect(dbconn.DbURL(hostname=self.hostname, port=self.port, dbname=self.dbname), utility=True) as conn: res = dbconn.execSQL(conn, self.query) self.res = res.fetchall()
def get_results(table_lst,dburl,statement_mem): try: conn = dbconn.connect(dburl) dbconn.execSQL(conn,"SET statement_mem TO \'"+statement_mem+"\';") conn.commit() for table in table_lst: query = ("select count(*) from "+table+";") curs = dbconn.execSQL(conn,query) row_cnt = curs.fetchall()[0][0] res_queue.put(table+"\t"+str(row_cnt)) conn.commit() conn.close() except DatabaseError, ex: logger.error('Failed to execute the statement on the database. Please, check log file for errors.') logger.error(ex) sys.exit(3)
def test_xlogcleanup(self): """ Test for verifying if xlog seg created while basebackup dumps out data does not get cleaned """ shutil.rmtree('base', True) PSQL.run_sql_command('DROP table if exists foo') # Inject fault at post checkpoint create (basebackup) logger.info ('Injecting base_backup_post_create_checkpoint fault ...') result = self.suspend_at( 'base_backup_post_create_checkpoint') logger.info(result.stdout) self.assertEqual(result.rc, 0, result.stdout) # Now execute basebackup. It will be blocked due to the # injected fault. logger.info ('Perform basebackup with xlog & recovery.conf...') pg_basebackup = subprocess.Popen(['pg_basebackup', '-x', '-R', '-D', 'base'] , stdout = subprocess.PIPE , stderr = subprocess.PIPE) # Give basebackup a moment to reach the fault & # trigger it logger.info('Check if suspend fault is hit ...') triggered = self.wait_triggered( 'base_backup_post_create_checkpoint') self.assertTrue(triggered, 'Fault was not triggered') # Perform operations that causes xlog seg generation logger.info ('Performing xlog seg generation ...') count = 0 while (count < 10): PSQL.run_sql_command('select pg_switch_xlog(); select pg_switch_xlog(); checkpoint;') count = count + 1 # Resume basebackup result = self.resume('base_backup_post_create_checkpoint') logger.info(result.stdout) self.assertEqual(result.rc, 0, result.stdout) # Wait until basebackup end logger.info('Waiting for basebackup to end ...') sql = "SELECT count(*) FROM pg_stat_replication" with dbconn.connect(dbconn.DbURL(), utility=True) as conn: while (True): curs = dbconn.execSQL(conn, sql) results = curs.fetchall() if (int(results[0][0]) == 0): break; # Verify if basebackup completed successfully # See if recovery.conf exists (Yes - Pass) self.assertTrue(os.path.exists(os.path.join('base','recovery.conf'))) logger.info ('Found recovery.conf in the backup directory.') logger.info ('Pass')
def count_wal_sender(self): sql = "SELECT count(*) FROM pg_stat_replication" with dbconn.connect(dbconn.DbURL()) as conn: curs = dbconn.execSQL(conn, sql) results = curs.fetchall() return results[0][0]
def get_all_parent_tables(dbname): SQL = "SELECT DISTINCT (schemaname || '.' || tablename) FROM pg_partitions" data = [] with dbconn.connect(dbconn.DbURL(dbname=dbname)) as conn: curs = dbconn.execSQL(conn, SQL) data = curs.fetchall() return set([d[0] for d in data])
def sendPgElogFromMaster(self, msg, sendAlerts): """ Send a message from the master database using select pg_elog ... """ # ensure initializeProvider() was called checkNotNone("masterDbUrl", self.__masterDbUrl) conn = None try: conn = dbconn.connect(self.__masterDbUrl, utility=True) dbconn.execSQL( conn, "SELECT GP_ELOG(" + self.__toSqlCharValue(msg) + "," + ("true" if sendAlerts else "false") + ")" ) finally: if conn: conn.close()
def __fetchSingleOutputRow(self, conn, sql, retry=False): """ Execute specified SQL command and return what we expect to be a single row. Raise an exception when more or fewer than one row is seen and when more than one row is seen display up to 10 rows as logger warnings. """ cursor = dbconn.execSQL(conn, sql) numrows = cursor.rowcount numshown = 0 res = None for row in cursor: if numrows != 1: # # if we got back more than one row # we print a few of the rows first # instead of immediately raising an exception # numshown += 1 if numshown > 10: break logger.warning(">>> %s" % row) else: assert res is None res = row assert res is not None cursor.close() if numrows != 1: raise Exception("SQL returned %d rows, not 1 as expected:\n%s" % (numrows, sql)) return res
def prepare(self): sql = ''' DROP TABLE IF EXISTS {tablename}; CREATE TABLE {tablename} ( c1 INT, c2 INT ) DISTRIBUTED BY (c1); '''.format(tablename=self.tablename) conn = dbconn.connect(dbconn.DbURL(dbname=self.dbname)) dbconn.execSQL(conn, sql) self.prepare_extra(conn) conn.commit()
def setUp(self): ctx.day = 1 dbconn.execSQL(ctx.conn, ''' DROP SCHEMA IF EXISTS gpexpand CASCADE; CREATE SCHEMA gpexpand; CREATE TABLE gpexpand.status (status text, updated timestamp); CREATE TABLE gpexpand.status_detail ( dbname text, fq_name text, schema_oid oid, table_oid oid, distribution_policy smallint[], distribution_policy_names text, distribution_policy_coloids text, distribution_policy_type text, root_partition_name text, storage_options text, rank int, status text, expansion_started timestamp, expansion_finished timestamp, source_bytes numeric ); INSERT INTO gpexpand.status VALUES ( 'SETUP', '2001-01-01' ), ( 'SETUP DONE', '2001-01-02' ); INSERT INTO gpexpand.status_detail (dbname, fq_name, rank, status) VALUES ('fake_db', 'public.t1', 2, 'NOT STARTED'), ('fake_db', 'public.t2', 2, 'NOT STARTED'); '''.format(dbname=ctx.dbname)) ctx.conn.commit() with open(ctx.filename, 'w') as f: f.write('''UNINITIALIZED:None EXPANSION_PREPARE_STARTED:<filename> BUILD_SEGMENT_TEMPLATE_STARTED:<filename> BUILD_SEGMENT_TEMPLATE_DONE:None BUILD_SEGMENTS_STARTED:<filename> BUILD_SEGMENTS_DONE:<number> UPDATE_CATALOG_STARTED:<filename> UPDATE_CATALOG_DONE:None SETUP_EXPANSION_SCHEMA_STARTED:None SETUP_EXPANSION_SCHEMA_DONE:None PREPARE_EXPANSION_SCHEMA_STARTED:None PREPARE_EXPANSION_SCHEMA_DONE:None EXPANSION_PREPARE_DONE:None ''')
def count_walsender(self): """Returns number of active walsender from pg_stat_replication. """ sql = "SELECT count(*) FROM pg_stat_replication" with dbconn.connect(dbconn.DbURL(), utility=True) as conn: curs = dbconn.execSQL(conn, sql) results = curs.fetchall() return results[0][0]
def select_version(conn): global glob_gpdb_major_version sqlStr = "SELECT version()" curs = dbconn.execSQL(conn, sqlStr) rows = curs.fetchall() for row in rows: log_output(row[0]) glob_gpdb_major_version = int( re.sub(".*Greenplum Database ([0-9]*)\..*", "\\1", row[0])) log_output("GPDB major version is %d" % glob_gpdb_major_version) log_output("Backend pid:") sqlStr = "SELECT pg_backend_pid()" curs = dbconn.execSQL(conn, sqlStr) rows = curs.fetchall() for row in rows: log_output(str(row[0]))
def get_hawq_hostname_all(master_port): try: dburl = dbconn.DbURL(port=master_port, dbname='template1') conn = dbconn.connect(dburl, True) query = "select role, status, port, hostname, address from gp_segment_configuration;" rows = dbconn.execSQL(conn, query) conn.close() except DatabaseError, ex: print "Failed to connect to database, this script can only be run when the database is up." sys.exit(1)
def run(self): logger.debug( 'Executing query (%s:%s) for segment (%s:%s) on database (%s)' % (self.qname, self.query, self.hostname, self.port, self.dbname)) with dbconn.connect(dbconn.DbURL(hostname=self.hostname, port=self.port, dbname=self.dbname), utility=True) as conn: res = dbconn.execSQL(conn, self.query) self.res = res.fetchall()
def verify(self, conn): sql = ''' select c1 from {tablename} order by c1; '''.format(tablename=self.tablename, counter=self.counter) results = dbconn.execSQL(conn, sql).fetchall() for i in range(self.counter, self.datasize): if i != int(results[i - self.counter][0]): self.report_incorrect_result() return
def process_table_list(table_list, dburl, statement_mem, metadatatable): try: conn = dbconn.connect(dburl) dbconn.execSQL(conn, "SET statement_mem TO '%s'" % statement_mem) conn.commit() for table in table_list: logger.info(' processing table %s...' % table) curs = dbconn.execSQL(conn, "select count(*) from %s" % table) rowcount = curs.fetchall()[0][0] curs = dbconn.execSQL(conn, "select ischeckdistkey from %s where tablename = '%s'" % (metadatatable, table)) ischeckdistkey = curs.fetchall()[0][0] distkeycnt = 'null' if ischeckdistkey == 1: logger.info(' getting distribution stats for table %s...' % table) query_distribution = """ select attname from pg_class as c, pg_namespace as n, pg_attribute as a, gp_distribution_policy as d where c.relnamespace = n.oid and a.attrelid = c.oid and n.nspname || '.' || c.relname = '%s' and d.localoid = c.oid and a.attnum = ANY(d.attrnums) """ % table curs = dbconn.execSQL(conn, query_distribution) distkeys = curs.fetchall() if len(distkeys) > 0: distkeysstr = '"' + '","'.join([d[0] for d in distkeys]) + '"' logger.info(' %s is distributed by: %s' % (table, distkeysstr)) query = """ select count(*) from ( select %s from %s group by %s ) as q """ % (distkeysstr, table, distkeysstr) curs = dbconn.execSQL(conn, query) distkeycnt = str(curs.fetchall()[0][0]) else: logger.info(' %s is distributed randomly, no analysis needed' % table) dbconn.execSQL(conn, """ insert into %s_p (tablename, ischeckdistkey, rowcount, distkeycount) values ('%s', %d, %d, %s) """ % (metadatatable, table, ischeckdistkey, rowcount, distkeycnt)) conn.commit() conn.close() except DatabaseError, ex: logger.error('Failed to execute the statement on the database. Please, check log file for errors.') logger.error(ex) sys.exit(3)
def execute_noret(dburl, query): try: conn = dbconn.connect(dburl) curs = dbconn.execSQL(conn, query) conn.commit() conn.close() except DatabaseError, ex: logger.error('Failed to execute the statement on the database. Please, check log file for errors.') logger.error(ex) sys.exit(3)
def test_pg_inherits(self): """ Change order of children in pg_inherits on segments. Alter should not cause inconsistent OIDs. """ # Create paritioned table. sql = local_path("create_part_table.sql") out = local_path("create_part_table.out") ans = local_path("create_part_table.ans") PSQL.run_sql_file(sql, out) assert Gpdiff.are_files_equal(out, ans) # Change order of children in pg_inherits on segments but not # on master. sql = local_path("reorder_pg_inherits.sql") out = local_path("reorder_pg_inherits.out") ans = local_path("reorder_pg_inherits.ans") segments = [ seg for seg in self.gparray.getSegDbList() if seg.role == "p" ] assert len(segments) > 0, "No primary segments found." primary = segments[0] PSQL.run_sql_file(sql, out, host=primary.hostname, port=primary.port, PGOPTIONS=("-c allow_system_table_mods=dml " "-c gp_session_role=utility")) assert Gpdiff.are_files_equal(out, ans) # Alter the partitioned table so that it's rewritten. with dbconn.connect(dbconn.DbURL()) as conn: dbconn.execSQL(conn, "ALTER TABLE co1 ALTER COLUMN c2 TYPE int8") conn.commit() # Run gpcheckcat result = GpdbVerify().gpcheckcat(testname="inconsistent") # Test return code if result[0] != 0: logger.error(result[2]) # log output self.fail("gpcheckcat 'inconsistent' test failed")
def run_pg_rewind(self, rewindInfo): """ Run pg_rewind for incremental recovery. """ rewindFailedSegments = [] # Run pg_rewind on all the targets for targetSegment, sourceHostName, sourcePort in rewindInfo: # Do CHECKPOINT on source to force TimeLineID to be updated in pg_control. # pg_rewind wants that to make incremental recovery successful finally. self.__logger.debug('Do CHECKPOINT on %s (port: %d) before running pg_rewind.' % (sourceHostName, sourcePort)) dburl = dbconn.DbURL(hostname=sourceHostName, port=sourcePort, dbname='template1') conn = dbconn.connect(dburl, utility=True) dbconn.execSQL(conn, "CHECKPOINT") # If the postmaster.pid still exists and another process # is actively using that pid, pg_rewind will fail when it # tries to start the failed segment in single-user # mode. It should be safe to remove the postmaster.pid # file since we do not expect the failed segment to be up. self.remove_postmaster_pid_from_remotehost( targetSegment.getSegmentHostName(), targetSegment.getSegmentDataDirectory()) # Run pg_rewind to do incremental recovery. cmd = gp.SegmentRewind('segment rewind', targetSegment.getSegmentHostName(), targetSegment.getSegmentDataDirectory(), sourceHostName, sourcePort, verbose=gplog.logging_is_verbose()) try: cmd.run(True) self.__logger.debug('pg_rewind results: %s' % cmd.results) except base.ExecutionError as e: self.__logger.debug("pg_rewind failed for target directory %s." % targetSegment.getSegmentDataDirectory()) self.__logger.warning("Incremental recovery failed for dbid %s. You must use gprecoverseg -F to recover the segment." % targetSegment.getSegmentDbId()) rewindFailedSegments.append(targetSegment) return rewindFailedSegments
def execute(conn, query): res = [] try: curs = dbconn.execSQL(conn, query) res = curs.fetchall() conn.commit() except DatabaseError, ex: logger.error( 'Failed to execute the statement on the database. Please, check log file for errors.' ) logger.error(ex)
def populate_partition(tablename, start_date, dbname, data_offset, rowcount=1094, host=None, port=0, user=None): insert_sql_str = "insert into %s select i+%d, 'backup', i + date '%s' from generate_series(0,%d) as i" % ( tablename, data_offset, start_date, rowcount) insert_sql_str += "; insert into %s select i+%d, 'restore', i + date '%s' from generate_series(0,%d) as i" % ( tablename, data_offset, start_date, rowcount) with dbconn.connect( dbconn.DbURL(hostname=host, port=port, username=user, dbname=dbname)) as conn: dbconn.execSQL(conn, insert_sql_str) conn.commit()
def __registerMirrorsInCatalog(self, gpArray): self.__logger.info("Updating gp_segment_configuration with mirror info") dburl = dbconn.DbURL(dbname='template1', port=gpArray.master.port) conn = dbconn.connect(dburl, utility=False) query = "select pg_catalog.gp_add_segment_mirror(%s::int2, '%s', '%s', %s, '%s');" try: for segmentPair in gpArray.getSegmentList(): mirror = segmentPair.mirrorDB filledInQuery = query % (mirror.getSegmentContentId(), mirror.getSegmentAddress(), mirror.getSegmentAddress(), mirror.getSegmentPort(), mirror.getSegmentDataDirectory()) dbconn.execSQL(conn, filledInQuery) except Exception as e: self.__logger.error("Failed while updating mirror info in gp_segment_configuration: %s" % str(e)) raise else: conn.commit() self.__logger.info("Successfully updated gp_segment_configuration with mirror info")
def run_SQLQuery(self, exec_sql, dbname, hostname='localhost', port=os.environ['PGPORT']): with dbconn.connect( dbconn.DbURL(dbname=dbname, hostname=hostname, port=port)) as conn: curs = dbconn.execSQL(conn, exec_sql) results = curs.fetchall() return results
def create_table_with_column_list(conn, storage_type, schemaname, tablename, col_name_list, col_type_list): col_name_list = col_name_list.strip().split(',') col_type_list = col_type_list.strip().split(',') col_list = ' (' + ','.join( ['%s %s' % (x, y) for x, y in zip(col_name_list, col_type_list)]) + ') ' if storage_type.lower() == 'heap': storage_str = '' elif storage_type.lower() == 'ao': storage_str = " with (appendonly=true) " elif storage_type.lower() == 'co': storage_str = " with (appendonly=true, orientation=column) " else: raise Exception("Invalid storage type") query = "CREATE TABLE " + schemaname + '.' + tablename + col_list + storage_str + "DISTRIBUTED RANDOMLY" dbconn.execSQL(conn, query) conn.commit()
def __init__(self, hostname, port, dbname): query = "SELECT dbid, content, port, fselocation, preferred_role FROM gp_segment_configuration s, pg_filespace_entry f WHERE s.dbid = fsedbid" print '%s: fetching cluster configuration' % (datetime.datetime.now()) dburl = dbconn.DbURL(hostname, port, dbname) print '%s: fetched cluster configuration' % (datetime.datetime.now()) try: with dbconn.connect(dburl, utility=True) as conn: self.seg_configs = dbconn.execSQL(conn, query).fetchall() except Exception, e: print e sys.exit(1)
def run(self): # Do CHECKPOINT on source to force TimeLineID to be updated in pg_control. # pg_rewind wants that to make incremental recovery successful finally. self.logger.debug( 'Do CHECKPOINT on {} (port: {}) before running pg_rewind.'.format( self.recovery_info.source_hostname, self.recovery_info.source_port)) dburl = dbconn.DbURL(hostname=self.recovery_info.source_hostname, port=self.recovery_info.source_port, dbname='template1') conn = dbconn.connect(dburl, utility=True) dbconn.execSQL(conn, "CHECKPOINT") conn.close() # If the postmaster.pid still exists and another process # is actively using that pid, pg_rewind will fail when it # tries to start the failed segment in single-user # mode. It should be safe to remove the postmaster.pid # file since we do not expect the failed segment to be up. self.remove_postmaster_pid()
def get_primary_segment_host_port(): """ return host, port of primary segment (dbid 2) """ FIRST_PRIMARY_DBID = 2 get_psegment_sql = 'select hostname, port from gp_segment_configuration where dbid=%i;' % FIRST_PRIMARY_DBID with dbconn.connect(dbconn.DbURL(dbname='template1')) as conn: cur = dbconn.execSQL(conn, get_psegment_sql) rows = cur.fetchall() primary_seg_host = rows[0][0] primary_seg_port = rows[0][1] return primary_seg_host, primary_seg_port
def check_db_exists(dbname, host=None, port=0, user=None): LIST_DATABASE_SQL = 'SELECT datname FROM pg_database' results = [] with dbconn.connect(dbconn.DbURL(hostname=host, username=user, port=port, dbname='template1')) as conn: curs = dbconn.execSQL(conn, LIST_DATABASE_SQL) results = curs.fetchall() for result in results: if result[0] == dbname: return True return False
def _analyze(self, restore_db, restore_tables, master_port): conn = None try: dburl = dbconn.DbURL(port=master_port, dbname=restore_db) conn = dbconn.connect(dburl) for table in restore_tables: logger.info( 'Commencing analyze of %s in %s database, please wait...' % (table, restore_db)) try: execSQL(conn, 'analyze %s' % table) conn.commit() except Exception, e: logger.warn( 'Issue with analyze of %s table, check log file for details' % table) else: logger.info('Analyze of %s table completed without error' % table) finally: if conn is not None: conn.close()
def run_pg_rewind(self, rewindInfo): """ Run pg_rewind for incremental recovery. """ rewindFailedSegments = [] # Run pg_rewind on all the targets for targetSegment, sourceHostName, sourcePort in rewindInfo: # Do CHECKPOINT on source to force TimeLineID to be updated in pg_control. # pg_rewind wants that to make incremental recovery successful finally. self.__logger.debug( 'Do CHECKPOINT on %s (port: %d) before running pg_rewind.' % (sourceHostName, sourcePort)) dburl = dbconn.DbURL(hostname=sourceHostName, port=sourcePort, dbname='template1') conn = dbconn.connect(dburl, utility=True) dbconn.execSQL(conn, "CHECKPOINT") # Run pg_rewind to do incremental recovery. cmd = gp.SegmentRewind('segment rewind', targetSegment.getSegmentHostName(), targetSegment.getSegmentDataDirectory(), sourceHostName, sourcePort, verbose=gplog.logging_is_verbose()) try: cmd.run(True) self.__logger.debug('pg_rewind results: %s' % cmd.results) except base.ExecutionError as e: self.__logger.debug( "pg_rewind failed for target directory %s." % targetSegment.getSegmentDataDirectory()) self.__logger.warning( "Incremental recovery failed for dbid %s. You must use gprecoverseg -F to recover the segment." % targetSegment.getSegmentDbId()) rewindFailedSegments.append(targetSegment) return rewindFailedSegments
def stop_primary(context, content_id): get_psegment_sql = 'select datadir, hostname from gp_segment_configuration where content=%i and role=\'p\';' % content_id with dbconn.connect(dbconn.DbURL(dbname='template1')) as conn: cur = dbconn.execSQL(conn, get_psegment_sql) rows = cur.fetchall() seg_data_dir = rows[0][0] seg_host = rows[0][1] # For demo_cluster tests that run on the CI gives the error 'bash: pg_ctl: command not found' # Thus, need to add pg_ctl to the path when ssh'ing to a demo cluster. subprocess.check_call(['ssh', seg_host, 'source %s/greenplum_path.sh && pg_ctl stop -m fast -D %s' % ( pipes.quote(os.environ.get("GPHOME")), pipes.quote(seg_data_dir)) ])
def verify(self, hostname=None, port=0): """ Verify tablespace functionality by ensuring the tablespace can be written to, read from, and the initial data is still correctly distributed. """ url = dbconn.DbURL(hostname=hostname, port=port, dbname=self.dbname) with closing(dbconn.connect(url, unsetSearchPath=False)) as conn: data = dbconn.query(conn, "SELECT gp_segment_id, i FROM tbl").fetchall() # verify that we can still write to the tablespace self.table_counter += 1 dbconn.execSQL( conn, "CREATE TABLE tbl_%s (i int) DISTRIBUTED RANDOMLY" % self.table_counter) dbconn.execSQL( conn, "INSERT INTO tbl_%s VALUES (GENERATE_SERIES(0, 25))" % self.table_counter) if sorted(data) != sorted(self.initial_data): raise Exception( "Tablespace data is not identically distributed. Expected:\n%r\n but found:\n%r" % (sorted(self.initial_data), sorted(data)))
def get_parent_partitions(self): with dbconn.connect(dbconn.DbURL(dbname=self.database, port=self.port)) as conn: for schema, table, col_list in self.table_list: PARENT_PARTITION_TABLENAME = """SELECT schemaname, tablename FROM pg_partitions WHERE partitiontablename='%s' AND partitionschemaname='%s'""" % (table, schema) res = execSQL(conn, PARENT_PARTITION_TABLENAME) for r in res: self.parent_partition_map[(schema, table)] = (r[0], r[1]) if (schema, table) not in self.parent_partition_map: self.parent_partition_map[(schema, table)] = (schema, table) return self.parent_partition_map
def drop_external_table(context, table_name, dbname, host=None, port=0, user=None): SQL = 'drop external table %s' % table_name with dbconn.connect( dbconn.DbURL(hostname=host, port=port, username=user, dbname=dbname)) as conn: dbconn.execSQL(conn, SQL) conn.commit() if check_table_exists(context, table_name=table_name, dbname=dbname, table_type='external', host=host, port=port, user=user): raise Exception('Unable to successfully drop the table %s' % table_name)
def refresh(self): query = ( "SELECT dbid, content, port, datadir, role, preferred_role, status, mode " "FROM gp_segment_configuration s WHERE 1 = 1") print '%s: fetching cluster configuration' % (datetime.datetime.now()) dburl = dbconn.DbURL(self.hostname, self.port, self.dbname) print '%s: fetched cluster configuration' % (datetime.datetime.now()) try: with dbconn.connect(dburl, utility=True) as conn: resultsets = dbconn.execSQL(conn, query).fetchall() except Exception, e: print e sys.exit(1)
def get_substitutions(self): substitutions = {} tinctest.logger.info("Generating distribution policy snapshot sql for all DBs ...") with dbconn.connect(dbconn.DbURL()) as conn: cmd = "select datname from pg_database where datname not in ('postgres', 'template1', 'template0')" rows = dbconn.execSQL(conn, cmd) sql_string = '' for row in rows: sql_string += '\\c %s \n' %row[0].strip() sql_string += """select n.nspname || '.' || c.relname as fq_name, p.attrnums as distribution_policy FROM pg_class c JOIN pg_namespace n ON (c.relnamespace=n.oid) JOIN pg_catalog.gp_distribution_policy p on (c.oid = p.localoid) WHERE n.nspname != 'gpexpand' AND n.nspname != 'pg_bitmapindex' AND c.relstorage != 'x';""" sql_string += '\n' substitutions['@distribution_policy_snapshot_sql@'] = sql_string return substitutions
def create_int_table(context, table_name, table_type='heap', dbname='testdb'): CREATE_TABLE_SQL = None NROW = 1000 table_type = table_type.upper() if table_type == 'AO': CREATE_TABLE_SQL = 'create table %s WITH(APPENDONLY=TRUE) as select generate_series(1,%d) as c1' % ( table_name, NROW) elif table_type == 'CO': CREATE_TABLE_SQL = 'create table %s WITH(APPENDONLY=TRUE, orientation=column) as select generate_series(1, %d) as c1' % ( table_name, NROW) elif table_type == 'HEAP': CREATE_TABLE_SQL = 'create table %s as select generate_series(1, %d) as c1' % (table_name, NROW) if CREATE_TABLE_SQL is None: raise Exception('Invalid table type specified') SELECT_TABLE_SQL = 'select count(*) from %s' % table_name with closing(dbconn.connect(dbconn.DbURL(dbname=dbname), unsetSearchPath=False)) as conn: dbconn.execSQL(conn, CREATE_TABLE_SQL) result = dbconn.querySingleton(conn, SELECT_TABLE_SQL) if result != NROW: raise Exception('Integer table creation was not successful. Expected %d does not match %d' % (NROW, result))
def check_skew(self): """Check that all tables have been distributed reasonably""" tables = [] logger.info("Checking skew ...") with dbconn.connect(dbconn.DbURL(dbname=self.test_database)) as conn: query = "select fq_name from gpexpand.status_detail where dbname = '%s'" % self.test_database rows = dbconn.execSQL(conn, query) for row in rows: tables.append(row[0].partition(".")[2]) for table in tables: query = "select data.segid, data.segtupcount from gp_toolkit.gp_skew_details( (select oid from pg_class where relname = 't1')) as data" rows = dbconn.execSQL(conn, query) tuplecounts = [] for row in rows: segtupcount = row[1] tuplecounts.append(segtupcount) if not self.check_random_dist_tuple_count_skew(table, tuplecounts): raise Exception("Table %s has not been redistributed well. Check skew." % table) return True
def validate_columns(self, schema_name, table_name, sort_column_list): columns = [] with dbconn.connect(dbconn.DbURL(dbname=self.database, port=self.port)) as conn: res = execSQL(conn, """SELECT attname FROM pg_attribute WHERE attrelid = (SELECT pg_class.oid FROM pg_class, pg_namespace WHERE pg_class.relname = '{table}' AND pg_namespace.nspname = '{schema}')""" .format(table=table_name, schema=schema_name)) for cols in res.fetchall(): columns.append(cols[0].strip()) for c in sort_column_list: if c[0] not in columns: raise ExceptionNoStackTraceNeeded('Table {schema}.{table} does not have column {col}' .format(schema=schema_name, table=table_name, col=c[0]))
def test_TableMainColumn_tablenames_exist(self): gpcheckcat_file = os.path.abspath( os.path.dirname(__file__) + "/../../../gpcheckcat") subject = imp.load_source('gpcheckcat', gpcheckcat_file) dburl = dbconn.DbURL(hostname=os.getenv('HOSTNAME', 'localhost'), port=os.getenv('PGPORT', 5432), dbname=os.getenv('PGDATABASE', 'postgres')) conn = dbconn.connect(dburl) table_query = "select count(*) from pg_class where relname='{table_name}'" # 5.json has an incomplete list of catalog tables # src/backend/catalog has .h files for some catalog tables # gpdb-doc/dita/ref_guide/system_catalogs/ has .xml files for almost all catalog tables for key in subject.TableMainColumn.keys(): cursor = dbconn.execSQL(conn, table_query.format(table_name=key)) self.assertTrue(cursor.rowcount == 1, "%s not found in catalog dir" % key)