def validate_mid_level_partitions(self, schema_name, table_name): partition_level, max_level = None, None conn = dbconn.connect( dbconn.DbURL(dbname=self.database, port=self.port)) try: parent_schema, parent_table = self.parent_partition_map[( schema_name, table_name)] if (parent_schema, parent_table) == (schema_name, table_name): return try: res = dbconn.query( conn, """ SELECT isleaf from pg_partition_tree('%s.%s') """ % (schema_name, table_name)) except Exception as e: logger.debug( 'Unable to get the partition information for table %s: (%s)' % (table_name, str(e))) for isleaf in res.fetchall(): if not isleaf[0]: raise Exception( 'Mid Level partition %s.%s is not supported by gpreload. Please specify only leaf partitions or parent table name' % (schema_name, table_name)) finally: conn.close()
def run(self): logger.debug('Executing query (%s:%s) for segment (%s:%s) on database (%s)' % ( self.qname, self.query, self.hostname, self.port, self.dbname)) with dbconn.connect(dbconn.DbURL(hostname=self.hostname, port=self.port, dbname=self.dbname), utility=True) as conn: self.res = dbconn.query(conn, self.query).fetchall() conn.close()
def _get_dblist(self): # template0 does not accept any connections so we exclude it with closing(dbconn.connect(dbconn.DbURL())) as conn: res = dbconn.query( conn, "SELECT datname FROM PG_DATABASE WHERE datname != 'template0'") return res.fetchall()
def explain_index_scan(conn, sqlStr): cost = -1.0 scan_type = "" try: log_output("") log_output("Executing query: %s" % ("explain " + sqlStr)) exp_curs = dbconn.query(conn, "explain " + sqlStr) rows = exp_curs.fetchall() table_scan_pattern = TABLE_SCAN_PATTERN index_scan_pattern = INDEX_SCAN_PATTERN bitmap_scan_pattern = BITMAP_SCAN_PATTERN if (glob_gpdb_major_version) <= 5: table_scan_pattern = TABLE_SCAN_PATTERN_V5 index_scan_pattern = INDEX_SCAN_PATTERN_V5 bitmap_scan_pattern = BITMAP_SCAN_PATTERN_V5 for row in rows: log_output(row[0]) if re.search(TABLE_NAME_PATTERN, row[0]) or re.search( NDV_TABLE_NAME_PATTERN, row[0]): if re.search(bitmap_scan_pattern, row[0]): scan_type = BITMAP_SCAN cost = cost_from_explain_line(row[0]) elif re.search(index_scan_pattern, row[0]): scan_type = INDEX_SCAN cost = cost_from_explain_line(row[0]) elif re.search(table_scan_pattern, row[0]): scan_type = TABLE_SCAN cost = cost_from_explain_line(row[0]) except Exception as e: log_output("\n*** ERROR explaining query:\n%s;\nReason: %s" % ("explain " + sqlStr, e)) return (scan_type, cost)
def query_sql(dbname, sql): result = None with dbconn.connect(dbconn.DbURL(dbname=dbname), unsetSearchPath=False) as conn: result = dbconn.query(conn, sql) return result
def __init__(self, name): self.name = name self.path = tempfile.mkdtemp() self.dbname = 'tablespace_db_%s' % name self.table_counter = 0 self.initial_data = None gparray = GpArray.initFromCatalog(dbconn.DbURL()) for host in gparray.getHostList(): run_cmd('ssh %s mkdir -p %s' % (pipes.quote(host), pipes.quote(self.path))) conn = dbconn.connect(dbconn.DbURL(), unsetSearchPath=False) dbconn.execSQL( conn, "CREATE TABLESPACE %s LOCATION '%s'" % (self.name, self.path)) dbconn.execSQL( conn, "CREATE DATABASE %s TABLESPACE %s" % (self.dbname, self.name)) conn.close() conn = dbconn.connect(dbconn.DbURL(dbname=self.dbname), unsetSearchPath=False) dbconn.execSQL(conn, "CREATE TABLE tbl (i int) DISTRIBUTED RANDOMLY") dbconn.execSQL(conn, "INSERT INTO tbl VALUES (GENERATE_SERIES(0, 25))") # save the distributed data for later verification self.initial_data = dbconn.query( conn, "SELECT gp_segment_id, i FROM tbl").fetchall() conn.close()
def verify_for_gpexpand(self, hostname=None, port=0): """ For gpexpand, we need make sure: 1. data is the same after redistribution finished 2. the table's numsegments is enlarged to the new cluster size """ url = dbconn.DbURL(hostname=hostname, port=port, dbname=self.dbname) with closing(dbconn.connect(url, unsetSearchPath=False)) as conn: data = dbconn.query(conn, "SELECT gp_segment_id, i FROM tbl").fetchall() tbl_numsegments = dbconn.querySingleton( conn, "SELECT numsegments FROM gp_distribution_policy " "WHERE localoid = 'tbl'::regclass::oid") num_segments = dbconn.querySingleton( conn, "SELECT COUNT(DISTINCT(content)) - 1 FROM gp_segment_configuration" ) if tbl_numsegments != num_segments: raise Exception( "After gpexpand the numsegments for tablespace table 'tbl' %d does not match " "the number of segments in the cluster %d." % (tbl_numsegments, num_segments)) initial_data = [i for _, i in self.initial_data] data_without_segid = [i for _, i in data] if sorted(data_without_segid) != sorted(initial_data): raise Exception( "Tablespace data is not identically distributed after running gp_expand. " "Expected pre-gpexpand data:\n%\n but found post-gpexpand data:\n%r" % (sorted(self.initial_data), sorted(data)))
def __fetchSingleOutputRow(self, conn, sql, retry=False): """ Execute specified SQL command and return what we expect to be a single row. Raise an exception when more or fewer than one row is seen and when more than one row is seen display up to 10 rows as logger warnings. """ cursor = dbconn.query(conn, sql) numrows = cursor.rowcount numshown = 0 res = None for row in cursor: if numrows != 1: # # if we got back more than one row # we print a few of the rows first # instead of immediately raising an exception # numshown += 1 if numshown > 10: break logger.warning('>>> %s' % row) else: assert res is None res = row assert res is not None cursor.close() if numrows != 1: raise Exception("SQL returned %d rows, not 1 as expected:\n%s" % (numrows, sql)) return res
def verify(self, hostname=None, port=0): """ Verify tablespace functionality by ensuring the tablespace can be written to, read from, and the initial data is still correctly distributed. """ url = dbconn.DbURL(hostname=hostname, port=port, dbname=self.dbname) with dbconn.connect(url, unsetSearchPath=False) as conn: data = dbconn.query(conn, "SELECT gp_segment_id, i FROM tbl").fetchall() # verify that we can still write to the tablespace self.table_counter += 1 dbconn.execSQL( conn, "CREATE TABLE tbl_%s (i int) DISTRIBUTED RANDOMLY" % self.table_counter) dbconn.execSQL( conn, "INSERT INTO tbl_%s VALUES (GENERATE_SERIES(0, 25))" % self.table_counter) conn.close() if sorted(data) != sorted(self.initial_data): raise Exception( "Tablespace data is not identically distributed. Expected:\n%r\n but found:\n%r" % (sorted(self.initial_data), sorted(data)))
def getRows(dbname, exec_sql): with closing( dbconn.connect(dbconn.DbURL(dbname=dbname), unsetSearchPath=False)) as conn: curs = dbconn.query(conn, exec_sql) results = curs.fetchall() return results
def _get_dist_policies(self): policies = [] with dbconn.connect(dbconn.DbURL(), unsetSearchPath=False) as conn: cursor = dbconn.query(conn, 'select * from gp_distribution_policy;').fetchall() for row in cursor: policies.append(row) conn.close() return policies
def basicSQLExec(conn, sql): cursor = None try: cursor = dbconn.query(conn, sql) rows = cursor.fetchall() return rows finally: if cursor: cursor.close()
def select_version(conn): global glob_gpdb_major_version sqlStr = "SELECT version()" curs = dbconn.query(conn, sqlStr) rows = curs.fetchall() for row in rows: log_output(row[0]) glob_gpdb_major_version = int( re.sub(".*Greenplum Database ([0-9]*)\..*", "\\1", row[0])) log_output("GPDB major version is %d" % glob_gpdb_major_version) log_output("Backend pid:") sqlStr = "SELECT pg_backend_pid()" curs = dbconn.query(conn, sqlStr) rows = curs.fetchall() for row in rows: log_output(str(row[0]))
def verify(self, conn): sql = ''' select c1 from {tablename} order by c1; '''.format(tablename=self.tablename, counter=self.counter) results = dbconn.query(conn, sql).fetchall() for i in range(self.counter, self.datasize): if i != int(results[i - self.counter][0]): self.report_incorrect_result() return
def dropSchemaIfExist(conn, schemaname): sql = "SELECT nspname FROM pg_catalog.pg_namespace WHERE nspname = '%s'" % schemaname dropsql = "DROP SCHEMA %s" % schemaname cursor = None try: cursor = dbconn.query(conn, sql) numrows = cursor.rowcount if numrows == 1: cursor = dbconn.query(conn, dropsql) else: raise CatalogError("more than one entry in pg_namespace for '%s'" % schemaname) except Exception as e: raise CatalogError("error dropping schema %s: %s" % (schemaname, str(e))) finally: if cursor: cursor.close() conn.commit()
def get_primary_segment_host_port_for_content(content='0'): """ return host, port of primary segment for the content id """ get_psegment_sql = "SELECT hostname, port FROM gp_segment_configuration WHERE content=%s AND role='p';" % content with closing(dbconn.connect(dbconn.DbURL(dbname='template1'), unsetSearchPath=False)) as conn: cur = dbconn.query(conn, get_psegment_sql) rows = cur.fetchall() primary_seg_host = rows[0][0] primary_seg_port = rows[0][1] return primary_seg_host, primary_seg_port
def check_db_exists(dbname, host=None, port=0, user=None): LIST_DATABASE_SQL = 'SELECT datname FROM pg_database' results = [] with closing(dbconn.connect(dbconn.DbURL(hostname=host, username=user, port=port, dbname='template1'), unsetSearchPath=False)) as conn: curs = dbconn.query(conn, LIST_DATABASE_SQL) results = curs.fetchall() for result in results: if result[0] == dbname: return True return False
def get_primary_segment_host_port(): """ return host, port of primary segment (dbid 2) """ FIRST_PRIMARY_DBID = 2 get_psegment_sql = 'select hostname, port from gp_segment_configuration where dbid=%i;' % FIRST_PRIMARY_DBID with closing(dbconn.connect(dbconn.DbURL(dbname='template1'), unsetSearchPath=False)) as conn: cur = dbconn.query(conn, get_psegment_sql) rows = cur.fetchall() primary_seg_host = rows[0][0] primary_seg_port = rows[0][1] return primary_seg_host, primary_seg_port
def stop_primary(context, content_id): get_psegment_sql = 'select datadir, hostname from gp_segment_configuration where content=%i and role=\'p\';' % content_id with closing(dbconn.connect(dbconn.DbURL(dbname='template1'), unsetSearchPath=False)) as conn: cur = dbconn.query(conn, get_psegment_sql) rows = cur.fetchall() seg_data_dir = rows[0][0] seg_host = rows[0][1] # For demo_cluster tests that run on the CI gives the error 'bash: pg_ctl: command not found' # Thus, need to add pg_ctl to the path when ssh'ing to a demo cluster. subprocess.check_call(['ssh', seg_host, 'source %s/greenplum_path.sh && pg_ctl stop -m fast -D %s' % ( pipes.quote(os.environ.get("GPHOME")), pipes.quote(seg_data_dir)) ])
def impl(context, filename, content): line = "" with closing( dbconn.connect(dbconn.DbURL(dbname='template1'), unsetSearchPath=False)) as conn: result = dbconn.query( conn, "SELECT port, hostname, datadir FROM gp_segment_configuration WHERE preferred_role='p' AND content = %s;" % content).fetchall() port, hostname, datadir = result[0][0], result[0][1], result[0][2] line = "%s|%s|%s %s|%s|/tmp/newdir" % (hostname, port, datadir, hostname, port) with open("/tmp/%s" % filename, "w") as fd: fd.write("%s\n" % line)
def doesSchemaExist(conn, schemaname): sql = "SELECT nspname FROM pg_catalog.pg_namespace WHERE nspname = '%s'" % schemaname cursor = None try: cursor = dbconn.query(conn, sql) numrows = cursor.rowcount if numrows == 0: return False elif numrows == 1: return True else: raise CatalogError("more than one entry in pg_namespace for '%s'" % schemaname) finally: if cursor: cursor.close()
def refresh(self): query = ( "SELECT dbid, content, port, datadir, role, preferred_role, status, mode " "FROM gp_segment_configuration s WHERE 1 = 1") print '%s: fetching cluster configuration' % (datetime.datetime.now()) dburl = dbconn.DbURL(self.hostname, self.port, self.dbname) print '%s: fetched cluster configuration' % (datetime.datetime.now()) try: with dbconn.connect(dburl, utility=True, unsetSearchPath=False) as conn: resultsets = dbconn.query(conn, query).fetchall() except Exception, e: print e sys.exit(1)
def test_TableMainColumn_tablenames_exist(self): gpcheckcat_file = os.path.abspath( os.path.dirname(__file__) + "/../../../gpcheckcat") subject = imp.load_source('gpcheckcat', gpcheckcat_file) dburl = dbconn.DbURL(hostname=os.getenv('HOSTNAME', 'localhost'), port=os.getenv('PGPORT', 5432), dbname=os.getenv('PGDATABASE', 'postgres')) conn = dbconn.connect(dburl) table_query = "select count(*) from pg_class where relname='{table_name}'" # 5.json has an incomplete list of catalog tables # src/backend/catalog has .h files for some catalog tables # gpdb-doc/dita/ref_guide/system_catalogs/ has .xml files for almost all catalog tables for key in subject.TableMainColumn.keys(): cursor = dbconn.query(conn, table_query.format(table_name=key)) self.assertTrue(cursor.rowcount == 1, "%s not found in catalog dir" % key)
def explain_join_scan(conn, sqlStr): cost = -1.0 scan_type = "" try: log_output("") log_output("Executing query: %s" % ("explain " + sqlStr)) exp_curs = dbconn.query(conn, "explain " + sqlStr) rows = exp_curs.fetchall() hash_join_pattern = HASH_JOIN_PATTERN nl_join_pattern = NL_JOIN_PATTERN table_scan_pattern = TABLE_SCAN_PATTERN index_scan_pattern = INDEX_SCAN_PATTERN bitmap_scan_pattern = BITMAP_SCAN_PATTERN if (glob_gpdb_major_version) <= 5: hash_join_pattern = HASH_JOIN_PATTERN_V5 nl_join_pattern = NL_JOIN_PATTERN_V5 table_scan_pattern = TABLE_SCAN_PATTERN_V5 index_scan_pattern = INDEX_SCAN_PATTERN_V5 bitmap_scan_pattern = BITMAP_SCAN_PATTERN_V5 # save the cost of the join above the scan type for row in rows: log_output(row[0]) if re.search(nl_join_pattern, row[0]): cost = cost_from_explain_line(row[0]) elif re.search(hash_join_pattern, row[0]): cost = cost_from_explain_line(row[0]) # mark the scan type used underneath the join if re.search(TABLE_NAME_PATTERN, row[0]) or re.search( BFV_TABLE_NAME_PATTERN, row[0]): if re.search(bitmap_scan_pattern, row[0]): scan_type = BITMAP_SCAN elif re.search(index_scan_pattern, row[0]): scan_type = INDEX_SCAN elif re.search(table_scan_pattern, row[0]): scan_type = TABLE_SCAN except Exception as e: log_output("\n*** ERROR explaining query:\n%s;\nReason: %s" % ("explain " + sqlStr, e)) return (scan_type, cost)
def get_parent_partitions(self): with closing( dbconn.connect( dbconn.DbURL(dbname=self.database, port=self.port))) as conn: for schema, table, col_list in self.table_list: PARENT_PARTITION_TABLENAME = """SELECT schemaname, tablename FROM pg_partitions WHERE partitiontablename='%s' AND partitionschemaname='%s'""" % ( table, schema) res = dbconn.query(conn, PARENT_PARTITION_TABLENAME) for r in res: self.parent_partition_map[(schema, table)] = (r[0], r[1]) if (schema, table) not in self.parent_partition_map: self.parent_partition_map[(schema, table)] = (schema, table) return self.parent_partition_map
def wait_for_desired_query_result_on_segment(host, port, query, desired_result, num_retries=150): """ Tries once a second to check for the desired query result on the segment. Raises an Exception after failing <num_retries> times. """ attempt = 0 actual_result = None url = dbconn.DbURL(hostname=host, port=port, dbname='template1') while (attempt < num_retries) and (actual_result != desired_result): attempt += 1 try: with closing(dbconn.connect(url, utility=True)) as conn: cursor = dbconn.query(conn, query) rows = cursor.fetchall() actual_result = rows[0][0] except Exception as e: print('could not query segment (%s:%s) %s' % (host, port, e)) time.sleep(1) if attempt == num_retries: raise Exception('Timed out after %s retries' % num_retries)
def validate_columns(self, schema_name, table_name, sort_column_list): columns = [] conn = dbconn.connect( dbconn.DbURL(dbname=self.database, port=self.port)) try: res = dbconn.query( conn, """SELECT attname FROM pg_attribute WHERE attrelid = (SELECT pg_class.oid FROM pg_class, pg_namespace WHERE pg_class.relname = '{table}' AND pg_namespace.nspname = '{schema}')""" .format(table=table_name, schema=schema_name)) for cols in res.fetchall(): columns.append(cols[0].strip()) for c in sort_column_list: if c[0] not in columns: raise ExceptionNoStackTraceNeeded( 'Table {schema}.{table} does not have column {col}'. format(schema=schema_name, table=table_name, col=c[0])) finally: conn.close()
def get_parent_partitions(self): with closing( dbconn.connect( dbconn.DbURL(dbname=self.database, port=self.port))) as conn: for schema, table, col_list in self.table_list: PARENT_PARTITION_TABLENAME = """SELECT nspname, relname FROM pg_catalog.pg_class AS c JOIN pg_catalog.pg_namespace AS ns ON c.relnamespace = ns.oid WHERE c.oid = pg_partition_root('%s.%s') """ % (schema, table) res = dbconn.query(conn, PARENT_PARTITION_TABLENAME) for r in res: self.parent_partition_map[(schema, table)] = (r[0], r[1]) if (schema, table) not in self.parent_partition_map: self.parent_partition_map[(schema, table)] = (schema, table) return self.parent_partition_map
def wait_for_desired_query_result(dburl, query, desired_result, utility=False): """ Tries once a second to check for the desired query result on the segment. Raises an Exception after failing <num_retries> times. """ attempt = 0 num_retries = 150 actual_result = None while (attempt < num_retries) and (actual_result != desired_result): attempt += 1 try: with closing(dbconn.connect(dburl, utility=utility)) as conn: cursor = dbconn.query(conn, query) rows = cursor.fetchall() actual_result = rows[0][0] except Exception as e: print('could not query (%s:%s) %s' % (dburl.pghost, dburl.pgport, e)) time.sleep(1) if attempt == num_retries: raise Exception('Timed out after %s retries' % num_retries)
def get_tablespace_locations(all_hosts, mirror_data_directory): """ to get user defined tablespace locations for all hosts or a specific mirror data directory. :param all_hosts: boolean type to indicate if tablespace locations should be fetched from all hosts. Only gpdeletesystem will call it with True :param mirror_data_directory: string type to fetch tablespace locations for a specific mirror data directory. Only gpmovemirrors will call it with specific data directory :return: list of tablespace locations """ tablespace_locations = [] oid_subq = """ (SELECT * FROM ( SELECT oid FROM pg_tablespace WHERE spcname NOT IN ('pg_default', 'pg_global') ) AS _q1, LATERAL gp_tablespace_location(_q1.oid) ) AS t """ with dbconn.connect(dbconn.DbURL()) as conn: if all_hosts: tablespace_location_sql = """ SELECT c.hostname, t.tblspc_loc||'/'||c.dbid tblspc_loc FROM {oid_subq} JOIN gp_segment_configuration AS c ON t.gp_segment_id = c.content """.format(oid_subq=oid_subq) else: tablespace_location_sql = """ SELECT c.hostname,c.content, t.tblspc_loc||'/'||c.dbid tblspc_loc FROM {oid_subq} JOIN gp_segment_configuration AS c ON t.gp_segment_id = c.content AND c.role='m' AND c.datadir ='{mirror_data_directory}' """.format(oid_subq=oid_subq, mirror_data_directory=mirror_data_directory) res = dbconn.query(conn, tablespace_location_sql) for r in res: tablespace_locations.append(r) return tablespace_locations