def copy_data(self, new_db, options, new_server=None, connections=1): """Copy the data for the tables. This method will copy the data for all of the tables to another, new database. The method will process an input file with INSERT statements if the option was selected by the caller. new_db[in] Name of the new database options[in] Options for copy e.g. force, etc. new_server[in] Connection to another server for copying the db Default is None (copy to same server - clone) connections[in] Number of threads(connections) to use for insert """ from mysql.utilities.common.table import Table # Must call init() first! # Guard for init() prerequisite assert self.init_called, "You must call db.init() before "+ \ "db.copy_data()." if self.skip_data: return self.destination = new_server # We know we're cloning if there is no new connection. self.cloning = (new_server == self.source) if self.cloning: self.destination = self.source quiet = options.get("quiet", False) tbl_options = { 'verbose' : self.verbose, 'get_cols' : True, 'quiet' : quiet } table_names = [obj[0] for obj in self.get_db_objects(_TABLE)] for tblname in table_names: if not quiet: print "# Copying data for TABLE %s.%s" % (self.db_name, tblname) tbl = Table(self.source, "%s.%s" % (self.q_db_name, quote_with_backticks(tblname)), tbl_options) if tbl is None: raise UtilDBError("Cannot create table object before copy.", -1, self.db_name) tbl.copy_data(self.destination, self.cloning, new_db, connections)
def copy_data(self, new_db, options, new_server=None, connections=1): """Copy the data for the tables. This method will copy the data for all of the tables to another, new database. The method will process an input file with INSERT statements if the option was selected by the caller. new_db[in] Name of the new database options[in] Options for copy e.g. force, etc. new_server[in] Connection to another server for copying the db Default is None (copy to same server - clone) connections[in] Number of threads(connections) to use for insert """ from mysql.utilities.common.table import Table # Must call init() first! # Guard for init() prerequisite assert self.init_called, "You must call db.init() before " + "db.copy_data()." if self.skip_data: return self.destination = new_server # We know we're cloning if there is no new connection. self.cloning = new_server == self.source if self.cloning: self.destination = self.source quiet = options.get("quiet", False) tbl_options = {"verbose": self.verbose, "get_cols": True, "quiet": quiet} # Turn off foreign keys if they were on at the start self.destination.disable_foreign_key_checks(True) table_names = [obj[0] for obj in self.get_db_objects(_TABLE)] for tblname in table_names: if not quiet: print "# Copying data for TABLE `%s`.`%s`" % (self.db_name, tblname) tbl = Table(self.source, "`%s`.`%s`" % (self.db_name, tblname), tbl_options) if tbl is None: raise UtilDBError("Cannot create table object before copy.", -1, self.db_name) tbl.copy_data(self.destination, self.cloning, new_db, connections) # Now, turn on foreign keys if they were on at the start self.destination.disable_foreign_key_checks(False)
def _process_data(tbl_name, statements, columns, table_col_list, table_rows, skip_blobs): # if there is data here, build bulk inserts # First, create table reference, then call insert_rows() tbl = Table(destination, tbl_name) # Need to check to see if table exists! if tbl.exists(): tbl.get_column_metadata() col_meta = True elif len(table_col_list) > 0: col_meta = _get_column_metadata(tbl, table_col_list) else: fix_cols = [] fix_cols.append((tbl.tbl_name, columns)) col_meta = _get_column_metadata(tbl, fix_cols) if not col_meta: raise UtilError("Cannot build bulk insert statements without " "the table definition.") ins_strs = tbl.make_bulk_insert(table_rows, tbl.q_db_name) if len(ins_strs[0]) > 0: statements.extend(ins_strs[0]) if len(ins_strs[1]) > 0 and not skip_blobs: for update in ins_strs[1]: statements.append(update)
def _export_table_data(source_srv, table, output_file, options): """Export the table data. This private method retrieves the data for the specified table in SQL format (e.g., INSERT statements) or in a tabular form (GRID, TAB, CSV, VERTICAL) to the specified output file or a separated file, according to the defined options. source_srv[in] Server instance or dictionary with connection values. table[in] Table to export, tuple with database name and table name. output_file[in] Output file to store the export data. options[in] Dictionary containing the options for the export: (skip_tables, skip_views, skip_triggers, skip_procs, skip_funcs, skip_events, skip_grants, skip_create, skip_data, no_header, display, format, file_per_tbl, and debug). return a filename if a temporary file is created to store the output result (used for multiprocessing) otherwise None. """ frmt = options.get("format", "sql") no_headers = options.get("no_headers", True) single = options.get("single", False) skip_blobs = options.get("skip_blobs", False) quiet = options.get("quiet", False) file_per_table = options.get("file_per_tbl", False) # Handle source server instance or server connection values. # Note: For multiprocessing the use of connection values instead of a # server instance is required to avoid internal errors. if isinstance(source_srv, Server): source = source_srv else: # Get source server instance from connection values. conn_options = { 'quiet': True, # Avoid repeating output for multiprocessing. 'version': "5.1.30", } servers = connect_servers(source_srv, None, conn_options) source = servers[0] # Must be after the connection test to get SQL_MODE sql_mode = source.select_variable("SQL_MODE") # Handle qualified table name (with backtick quotes). db_name = table[0] tbl_name = "{0}.{1}".format(db_name, table[1]) q_db_name = quote_with_backticks(db_name, sql_mode) q_tbl_name = "{0}.{1}".format(q_db_name, quote_with_backticks(table[1], sql_mode)) # Determine output file to store exported table data. if file_per_table: # Store result of table export to a separated file. file_name = _generate_tbl_filename(tbl_name, frmt) outfile = open(file_name, "w+") tempfile_used = False else: if output_file: # Output file to store result is defined. outfile = output_file tempfile_used = False else: # Store result in a temporary file (merged later). # Used by multiprocess export. tempfile_used = True outfile = tempfile.NamedTemporaryFile(delete=False) message = "# Data for table {0}:".format(q_tbl_name) outfile.write("{0}\n".format(message)) tbl_options = {'verbose': False, 'get_cols': True, 'quiet': quiet} cur_table = Table(source, q_tbl_name, tbl_options) if single and frmt not in ("sql", "grid", "vertical"): retrieval_mode = -1 first = True else: retrieval_mode = 1 first = False # Find if we have some UNIQUE NOT NULL column indexes. unique_indexes = len(cur_table.get_not_null_unique_indexes()) # If all columns are BLOBS or there aren't any UNIQUE NOT NULL indexes # then rows won't be correctly copied using the update statement, # so we must warn the user. if (not skip_blobs and frmt == "sql" and (cur_table.blob_columns == len(cur_table.column_names) or (not unique_indexes and cur_table.blob_columns))): print("# WARNING: Table {0}.{1} contains only BLOB and TEXT " "fields. Rows will be generated with separate INSERT " "statements.".format(cur_table.q_db_name, cur_table.q_tbl_name)) for data_rows in cur_table.retrieve_rows(retrieval_mode): _export_row(data_rows, cur_table, frmt, single, skip_blobs, first, no_headers, outfile) if first: first = False if file_per_table: outfile.close() return outfile.name if tempfile_used else None
def check_index(src_val, table_args, options): """Check for duplicate or redundant indexes for one or more tables This method will examine the indexes for one or more tables and identify any indexes that are potential duplicates or redundant. It prints the equivalent DROP statements if selected. src_val[in] a dictionary containing connection information for the source including: (user, password, host, port, socket) table_args[in] list of tables in the form 'db.table' or 'db' options[in] dictionary of options to include: show-drops : show drop statements for dupe indexes skip : skip non-existant tables verbosity : print extra information show-indexes : show all indexes for each table index-format : index format = sql, table, tab, csv worst : show worst performing indexes best : show best performing indexes Returns bool True = success, raises UtilError if error """ # Get options show_drops = options.get("show-drops", False) skip = options.get("skip", False) verbosity = options.get("verbosity", False) show_indexes = options.get("show-indexes", False) index_format = options.get("index-format", False) stats = options.get("stats", False) first_indexes = options.get("best", None) last_indexes = options.get("worst", None) from mysql.utilities.common.server import connect_servers from mysql.utilities.common.database import Database from mysql.utilities.common.table import Table # Try to connect to the MySQL database server. conn_options = { 'quiet' : verbosity == 1, 'version' : "5.0.0", } servers = connect_servers(src_val, None, conn_options) source = servers[0] db_list = [] # list of databases table_list = [] # list of all tables to process # Build a list of objects to process # 1. start with db_list if no obects present on command line # 2. process command line options. # 3. loop through database list and add all tables # 4. check indexes # Perform the options check here. Loop through objects presented. for obj in table_args: # If a . appears, we are operating on a specific table idx = obj.count(".") if (idx == 1): table_list.append(obj) # Else we are operating on a specific database. else: db_list.append(obj) # Loop through database list adding tables for db in db_list: db_source = Database(source, db) db_source.init() tables = db_source.get_db_objects("TABLE") if not tables and verbosity >= 1: print "# Warning: database %s does not exist. Skipping." % (db) for table in tables: table_list.append(db + "." + table[0]) # Fail if no tables to check if not table_list: raise UtilError("No tables to check.") if verbosity > 1: print "# Checking indexes..." # Check indexes for each table in the list for table_name in table_list: tbl_options = { 'verbose' : verbosity >= 1, 'get_cols' : False, 'quiet' : verbosity is None or verbosity < 1 } tbl = Table(source, table_name, tbl_options) exists = tbl.exists() if not exists and not skip: raise UtilError("Table %s does not exist. Use --skip " "to skip missing tables." % table_name) if exists: if not tbl.get_indexes(): if verbosity > 1: print "# Table %s is not indexed." % (table_name) else: if show_indexes: tbl.print_indexes(index_format) # Show if table has primary key if not tbl.has_primary_key(): if verbosity > 1: print "# Table %s does not contain a PRIMARY key." tbl.check_indexes(show_drops) # Show best and/or worst indexes if stats: if first_indexes is not None: tbl.show_special_indexes(index_format, first_indexes, True) if last_indexes is not None: tbl.show_special_indexes(index_format, last_indexes) if verbosity > 1: print "#" if verbosity > 1: print "# ...done."
def check_index(src_val, table_args, options): """Check for duplicate or redundant indexes for one or more tables This method will examine the indexes for one or more tables and identify any indexes that are potential duplicates or redundant. It prints the equivalent DROP statements if selected. src_val[in] a dictionary containing connection information for the source including: (user, password, host, port, socket) table_args[in] list of tables in the form 'db.table' or 'db' options[in] dictionary of options to include: show-drops : show drop statements for dupe indexes skip : skip non-existent tables verbosity : print extra information show-indexes : show all indexes for each table index-format : index format = sql, table, tab, csv worst : show worst performing indexes best : show best performing indexes report-indexes : reports tables without PK or UK Returns bool True = success, raises UtilError if error """ # Get options show_drops = options.get("show-drops", False) skip = options.get("skip", False) verbosity = options.get("verbosity", False) show_indexes = options.get("show-indexes", False) index_format = options.get("index-format", False) stats = options.get("stats", False) first_indexes = options.get("best", None) last_indexes = options.get("worst", None) report_indexes = options.get("report-indexes", False) # Try to connect to the MySQL database server. conn_options = { 'quiet': verbosity == 1, 'version': "5.0.0", } servers = connect_servers(src_val, None, conn_options) source = servers[0] db_list = [] # list of databases table_list = [] # list of all tables to process # Build a list of objects to process # 1. start with db_list if no objects present on command line # 2. process command line options. # 3. loop through database list and add all tables # 4. check indexes # Get sql_mode value set on servers sql_mode = source.select_variable("SQL_MODE") # Perform the options check here. Loop through objects presented. for obj in table_args: m_obj = parse_object_name(obj, sql_mode) # Check if a valid database/table name is specified. if m_obj[0] is None: raise UtilError( PARSE_ERR_OBJ_NAME_FORMAT.format( obj_name=obj, option="the database/table arguments")) else: db_name, obj_name = m_obj if obj_name: # Table specified table_list.append(obj) # Else we are operating on a specific database. else: # Remove backtick quotes. db_name = remove_backtick_quoting(db_name, sql_mode) \ if is_quoted_with_backticks(db_name, sql_mode) else db_name db_list.append(db_name) # Loop through database list adding tables for db in db_list: db_source = Database(source, db) db_source.init() tables = db_source.get_db_objects("TABLE") if not tables and verbosity >= 1: print("# Warning: database %s does not exist. Skipping." % (db)) for table in tables: table_list.append("{0}.{1}".format( quote_with_backticks(db, sql_mode), quote_with_backticks(table[0], sql_mode))) # Fail if no tables to check if not table_list: raise UtilError("No tables to check.") if verbosity > 1: print("# Checking indexes...") # Check indexes for each table in the list # pylint: disable=R0101 for table_name in table_list: tbl_options = { 'verbose': verbosity >= 1, 'get_cols': False, 'quiet': verbosity is None or verbosity < 1 } tbl = Table(source, table_name, tbl_options) exists = tbl.exists() if not exists and not skip: raise UtilError("Table %s does not exist. Use --skip " "to skip missing tables." % table_name) if exists: if not tbl.get_indexes(): if verbosity > 1 or report_indexes: print("# Table %s is not indexed." % (table_name)) else: if show_indexes: tbl.print_indexes(index_format, verbosity) # Show if table has primary key if verbosity > 1 or report_indexes: if not tbl.has_primary_key(): if not tbl.has_unique_key(): print("# Table {0} does not contain neither a " "PRIMARY nor UNIQUE key.".format(table_name)) else: print("# Table {0} does not contain a PRIMARY key." "".format(table_name)) tbl.check_indexes(show_drops) # Show best and/or worst indexes if stats: if first_indexes is not None: tbl.show_special_indexes(index_format, first_indexes, True) if last_indexes is not None: tbl.show_special_indexes(index_format, last_indexes) if verbosity > 1: print("#") if verbosity > 1: print("# ...done.")
def _export_table_data(source_srv, table, output_file, options): """Export the table data. This private method retrieves the data for the specified table in SQL format (e.g., INSERT statements) or in a tabular form (GRID, TAB, CSV, VERTICAL) to the specified output file or a separated file, according to the defined options. source_srv[in] Server instance or dictionary with connection values. table[in] Table to export, tuple with database name and table name. output_file[in] Output file to store the export data. options[in] Dictionary containing the options for the export: (skip_tables, skip_views, skip_triggers, skip_procs, skip_funcs, skip_events, skip_grants, skip_create, skip_data, no_header, display, format, file_per_tbl, and debug). return a filename if a temporary file is created to store the output result (used for multiprocessing) otherwise None. """ frmt = options.get("format", "sql") no_headers = options.get("no_headers", True) single = options.get("single", False) skip_blobs = options.get("skip_blobs", False) quiet = options.get("quiet", False) file_per_table = options.get("file_per_tbl", False) # Handle source server instance or server connection values. # Note: For multiprocessing the use of connection values instead of a # server instance is required to avoid internal errors. if isinstance(source_srv, Server): source = source_srv else: # Get source server instance from connection values. conn_options = { 'quiet': True, # Avoid repeating output for multiprocessing. 'version': "5.1.30", } servers = connect_servers(source_srv, None, conn_options) source = servers[0] # Handle qualified table name (with backtick quotes). db_name = table[0] tbl_name = "{0}.{1}".format(db_name, table[1]) q_db_name = quote_with_backticks(db_name) q_tbl_name = "{0}.{1}".format(q_db_name, quote_with_backticks(table[1])) # Determine output file to store exported table data. if file_per_table: # Store result of table export to a separated file. file_name = _generate_tbl_filename(tbl_name, frmt) outfile = open(file_name, "w+") tempfile_used = False else: if output_file: # Output file to store result is defined. outfile = output_file tempfile_used = False else: # Store result in a temporary file (merged later). # Used by multiprocess export. tempfile_used = True outfile = tempfile.NamedTemporaryFile(delete=False) message = "# Data for table {0}:".format(q_tbl_name) outfile.write("{0}\n".format(message)) tbl_options = { 'verbose': False, 'get_cols': True, 'quiet': quiet } cur_table = Table(source, q_tbl_name, tbl_options) if single and frmt not in ("sql", "grid", "vertical"): retrieval_mode = -1 first = True else: retrieval_mode = 1 first = False # Find if we have some UNIQUE NOT NULL column indexes. unique_indexes = len(cur_table.get_not_null_unique_indexes()) # If all columns are BLOBS or there aren't any UNIQUE NOT NULL indexes # then rows won't be correctly copied using the update statement, # so we must warn the user. if (not skip_blobs and frmt == "sql" and (cur_table.blob_columns == len(cur_table.column_names) or (not unique_indexes and cur_table.blob_columns))): print("# WARNING: Table {0}.{1} contains only BLOB and TEXT " "fields. Rows will be generated with separate INSERT " "statements.".format(cur_table.db_name, cur_table.tbl_name)) for data_rows in cur_table.retrieve_rows(retrieval_mode): _export_row(data_rows, cur_table, frmt, single, skip_blobs, first, no_headers, outfile) if first: first = False if file_per_table: outfile.close() return outfile.name if tempfile_used else None
def export_data(source, src_val, db_list, options): """Produce data for the tables in a database. This method retrieves the data for each table in the databases listed in the form of BULK INSERT (SQL) statements or in a tabular form to the file specified. The valid values for the format parameter are SQL, CSV, TSV, VERITCAL, or GRID. source[in] Server instance src_val[in] a dictionary containing connection information for the source including: (user, password, host, port, socket) options[in] a dictionary containing the options for the copy: (skip_tables, skip_views, skip_triggers, skip_procs, skip_funcs, skip_events, skip_grants, skip_create, skip_data, no_header, display, format, file_per_tbl, and debug) Returns bool True = success, False = error """ from mysql.utilities.common.database import Database from mysql.utilities.common.table import Table format = options.get("format", "sql") no_headers = options.get("no_headers", True) column_type = options.get("display", "brief") single = options.get("single", False) skip_blobs = options.get("skip_blobs", False) quiet = options.get("quiet", False) file_per_table = options.get("file_per_tbl", False) skip_views = options.get("skip_views", False) skip_procs = options.get("skip_procs", False) skip_funcs = options.get("skip_funcs", False) skip_events = options.get("skip_events", False) skip_grants = options.get("skip_grants", False) if options.get("all", False): rows = source.get_all_databases() for row in rows: if row[0] not in db_list: db_list.append(row[0]) # Check if database exists and user permissions on source for all databases table_lock_list = [] table_list = [] for db_name in db_list: source_db = Database(source, db_name) # Make a dictionary of the options access_options = { 'skip_views' : skip_views, 'skip_procs' : skip_procs, 'skip_funcs' : skip_funcs, 'skip_grants' : skip_grants, 'skip_events' : skip_events, } # Error is source database does not exist if not source_db.exists(): raise UtilDBError("Source database does not exist - %s" % db_name, -1, db_name) source_db.check_read_access(src_val["user"], src_val["host"], access_options) # Build table list tables = source_db.get_db_objects("TABLE") for table in tables: table_list.append((db_name, table[0])) old_db = "" for table in table_list: db_name = table[0] tbl_name = "%s.%s" % (db_name, table[1]) # quote database and table name with backticks q_db_name = quote_with_backticks(db_name) q_tbl_name = "%s.%s" % (q_db_name, quote_with_backticks(table[1])) if not quiet and old_db != db_name: old_db = db_name if format == "sql": print "USE %s;" % q_db_name print "# Exporting data from %s" % db_name if file_per_table: print "# Writing table data to files." tbl_options = { 'verbose' : False, 'get_cols' : True, 'quiet' : quiet } cur_table = Table(source, q_tbl_name, tbl_options) if single and format not in ("sql", "grid", "vertical"): retrieval_mode = -1 first = True else: retrieval_mode = 1 first = False message = "# Data for table %s: " % q_tbl_name # switch for writing to files if file_per_table: if format == 'sql': file_name = tbl_name + ".sql" else: file_name = tbl_name + ".%s" % format.lower() outfile = open(file_name, "w") outfile.write(message + "\n") else: outfile = None print message for data_rows in cur_table.retrieve_rows(retrieval_mode): _export_row(data_rows, cur_table, format, single, skip_blobs, first, no_headers, outfile) if first: first = False if file_per_table: outfile.close() if not quiet: print "#...done." return True
def export_data(source, src_val, db_list, options): """Produce data for the tables in a database. This method retrieves the data for each table in the databases listed in the form of BULK INSERT (SQL) statements or in a tabular form to the file specified. The valid values for the format parameter are SQL, CSV, TSV, VERITCAL, or GRID. source[in] Server instance src_val[in] a dictionary containing connection information for the source including: (user, password, host, port, socket) options[in] a dictionary containing the options for the copy: (skip_tables, skip_views, skip_triggers, skip_procs, skip_funcs, skip_events, skip_grants, skip_create, skip_data, no_header, display, format, file_per_tbl, and debug) Returns bool True = success, False = error """ from mysql.utilities.common.database import Database from mysql.utilities.common.table import Table format = options.get("format", "sql") no_headers = options.get("no_headers", True) column_type = options.get("display", "brief") single = options.get("single", False) skip_blobs = options.get("skip_blobs", False) quiet = options.get("quiet", False) file_per_table = options.get("file_per_tbl", False) skip_views = options.get("skip_views", False) skip_procs = options.get("skip_procs", False) skip_funcs = options.get("skip_funcs", False) skip_events = options.get("skip_events", False) skip_grants = options.get("skip_grants", False) if options.get("all", False): rows = source.get_all_databases() for row in rows: if row[0] not in db_list: db_list.append(row[0]) # Check if database exists and user permissions on source for all databases table_lock_list = [] table_list = [] for db_name in db_list: source_db = Database(source, db_name) # Make a dictionary of the options access_options = { 'skip_views': skip_views, 'skip_procs': skip_procs, 'skip_funcs': skip_funcs, 'skip_grants': skip_grants, 'skip_events': skip_events, } # Error is source database does not exist if not source_db.exists(): raise UtilDBError("Source database does not exist - %s" % db_name, -1, db_name) source_db.check_read_access(src_val["user"], src_val["host"], access_options) # Build table list tables = source_db.get_db_objects("TABLE") for table in tables: table_list.append((db_name, table[0])) old_db = "" for table in table_list: db_name = table[0] tbl_name = "%s.%s" % (db_name, table[1]) # quote database and table name with backticks q_db_name = quote_with_backticks(db_name) q_tbl_name = "%s.%s" % (q_db_name, quote_with_backticks(table[1])) if not quiet and old_db != db_name: old_db = db_name if format == "sql": print "USE %s;" % q_db_name print "# Exporting data from %s" % db_name if file_per_table: print "# Writing table data to files." tbl_options = {'verbose': False, 'get_cols': True, 'quiet': quiet} cur_table = Table(source, q_tbl_name, tbl_options) if single and format not in ("sql", "grid", "vertical"): retrieval_mode = -1 first = True else: retrieval_mode = 1 first = False message = "# Data for table %s: " % q_tbl_name # switch for writing to files if file_per_table: if format == 'sql': file_name = tbl_name + ".sql" else: file_name = tbl_name + ".%s" % format.lower() outfile = open(file_name, "w") outfile.write(message + "\n") else: outfile = None print message for data_rows in cur_table.retrieve_rows(retrieval_mode): _export_row(data_rows, cur_table, format, single, skip_blobs, first, no_headers, outfile) if first: first = False if file_per_table: outfile.close() if not quiet: print "#...done." return True
def check_index(src_val, table_args, options): """Check for duplicate or redundant indexes for one or more tables This method will examine the indexes for one or more tables and identify any indexes that are potential duplicates or redundant. It prints the equivalent DROP statements if selected. src_val[in] a dictionary containing connection information for the source including: (user, password, host, port, socket) table_args[in] list of tables in the form 'db.table' or 'db' options[in] dictionary of options to include: show-drops : show drop statements for dupe indexes skip : skip non-existant tables verbosity : print extra information show-indexes : show all indexes for each table index-format : index format = sql, table, tab, csv worst : show worst performing indexes best : show best performing indexes Returns bool True = success, raises UtilError if error """ # Get options show_drops = options.get("show-drops", False) skip = options.get("skip", False) verbosity = options.get("verbosity", False) show_indexes = options.get("show-indexes", False) index_format = options.get("index-format", False) stats = options.get("stats", False) first_indexes = options.get("best", None) last_indexes = options.get("worst", None) from mysql.utilities.common.server import connect_servers from mysql.utilities.common.database import Database from mysql.utilities.common.table import Table # Try to connect to the MySQL database server. conn_options = { 'quiet': verbosity == 1, 'version': "5.0.0", } servers = connect_servers(src_val, None, conn_options) source = servers[0] db_list = [] # list of databases table_list = [] # list of all tables to process # Build a list of objects to process # 1. start with db_list if no obects present on command line # 2. process command line options. # 3. loop through database list and add all tables # 4. check indexes # Perform the options check here. Loop through objects presented. for obj in table_args: # If a . appears, we are operating on a specific table idx = obj.count(".") if (idx == 1): table_list.append(obj) # Else we are operating on a specific database. else: db_list.append(obj) # Loop through database list adding tables for db in db_list: db_source = Database(source, db) db_source.init() tables = db_source.get_db_objects("TABLE") if not tables and verbosity >= 1: print "# Warning: database %s does not exist. Skipping." % (db) for table in tables: table_list.append(db + "." + table[0]) # Fail if no tables to check if not table_list: raise UtilError("No tables to check.") if verbosity > 1: print "# Checking indexes..." # Check indexes for each table in the list for table_name in table_list: tbl_options = { 'verbose': verbosity >= 1, 'get_cols': False, 'quiet': verbosity is None or verbosity < 1 } tbl = Table(source, table_name, tbl_options) exists = tbl.exists() if not exists and not skip: raise UtilError("Table %s does not exist. Use --skip " "to skip missing tables." % table_name) if exists: if not tbl.get_indexes(): if verbosity > 1: print "# Table %s is not indexed." % (table_name) else: if show_indexes: tbl.print_indexes(index_format) # Show if table has primary key if not tbl.has_primary_key(): if verbosity > 1: print "# Table %s does not contain a PRIMARY key." tbl.check_indexes(show_drops) # Show best and/or worst indexes if stats: if first_indexes is not None: tbl.show_special_indexes(index_format, first_indexes, True) if last_indexes is not None: tbl.show_special_indexes(index_format, last_indexes) if verbosity > 1: print "#" if verbosity > 1: print "# ...done."
def check_index(src_val, table_args, options): """Check for duplicate or redundant indexes for one or more tables This method will examine the indexes for one or more tables and identify any indexes that are potential duplicates or redundant. It prints the equivalent DROP statements if selected. src_val[in] a dictionary containing connection information for the source including: (user, password, host, port, socket) table_args[in] list of tables in the form 'db.table' or 'db' options[in] dictionary of options to include: show-drops : show drop statements for dupe indexes skip : skip non-existent tables verbosity : print extra information show-indexes : show all indexes for each table index-format : index format = sql, table, tab, csv worst : show worst performing indexes best : show best performing indexes report-indexes : reports tables without PK or UK Returns bool True = success, raises UtilError if error """ # Get options show_drops = options.get("show-drops", False) skip = options.get("skip", False) verbosity = options.get("verbosity", False) show_indexes = options.get("show-indexes", False) index_format = options.get("index-format", False) stats = options.get("stats", False) first_indexes = options.get("best", None) last_indexes = options.get("worst", None) report_indexes = options.get("report-indexes", False) # Try to connect to the MySQL database server. conn_options = { 'quiet': verbosity == 1, 'version': "5.0.0", } servers = connect_servers(src_val, None, conn_options) source = servers[0] db_list = [] # list of databases table_list = [] # list of all tables to process # Build a list of objects to process # 1. start with db_list if no objects present on command line # 2. process command line options. # 3. loop through database list and add all tables # 4. check indexes obj_name_regexp = re.compile(REGEXP_QUALIFIED_OBJ_NAME) # Perform the options check here. Loop through objects presented. for obj in table_args: m_obj = obj_name_regexp.match(obj) # Check if a valid database/table name is specified. if not m_obj: raise UtilError(PARSE_ERR_OBJ_NAME_FORMAT.format( obj_name=obj, option="the database/table arguments")) else: db_name, obj_name = m_obj.groups() if obj_name: # Table specified table_list.append(obj) # Else we are operating on a specific database. else: # Remove backtick quotes. db_name = remove_backtick_quoting(db_name) \ if is_quoted_with_backticks(db_name) else db_name db_list.append(db_name) # Loop through database list adding tables for db in db_list: db_source = Database(source, db) db_source.init() tables = db_source.get_db_objects("TABLE") if not tables and verbosity >= 1: print "# Warning: database %s does not exist. Skipping." % (db) for table in tables: table_list.append("{0}.{1}".format(quote_with_backticks(db), quote_with_backticks(table[0]))) # Fail if no tables to check if not table_list: raise UtilError("No tables to check.") if verbosity > 1: print "# Checking indexes..." # Check indexes for each table in the list for table_name in table_list: tbl_options = { 'verbose': verbosity >= 1, 'get_cols': False, 'quiet': verbosity is None or verbosity < 1 } tbl = Table(source, table_name, tbl_options) exists = tbl.exists() if not exists and not skip: raise UtilError("Table %s does not exist. Use --skip " "to skip missing tables." % table_name) if exists: if not tbl.get_indexes(): if verbosity > 1 or report_indexes: print "# Table %s is not indexed." % (table_name) else: if show_indexes: tbl.print_indexes(index_format, verbosity) # Show if table has primary key if verbosity > 1 or report_indexes: if not tbl.has_primary_key(): if not tbl.has_unique_key(): print("# Table {0} does not contain neither a " "PRIMARY nor UNIQUE key.".format(table_name)) else: print("# Table {0} does not contain a PRIMARY key." "".format(table_name)) tbl.check_indexes(show_drops) # Show best and/or worst indexes if stats: if first_indexes is not None: tbl.show_special_indexes(index_format, first_indexes, True) if last_indexes is not None: tbl.show_special_indexes(index_format, last_indexes) if verbosity > 1: print "#" if verbosity > 1: print "# ...done."
def check_consistency(server1, server2, table1_name, table2_name, options={}): """Check the data consistency of two tables This method performs a comparison of the data in two tables. Algorithm: This procedure uses a separate compare database containing a table that contains an MD5 hash of the concatenated values of a row along with a MD5 hash of the concatenation of the primary key, the primary key columns, and a grouping column named span. The process to calculate differences in table data is as follows: 0. If binary log on for the client (sql_log_bin = 1), turn it off. 1. Create the compare database and the compare table for each database (db1.table1, db2.table2) 2. For each table, populate the compare table using an INSERT statement that calculates the MD5 hash for the row. 3. For each table, a summary result is formed by summing the MD5 hash values broken into four parts. The MD5 hash is converted to decimal for a numerical sum. This summary query also groups the rows in the compare table by the span column which is formed from the first 4 positions of the primary key hash. 4. The summary tables are compared using set methods to find rows (spans) that appear in both tables, those only in table1, and those only in table2. A set operation that does not match the rows means the summed hash is different therefore meaning one or more rows in the span have either a missing row in the other table or the data is different. If no differences found, skip to (8). 5. The span values from the sets that contain rows that are different are then compared again using set operations. Those spans that are in both sets contain rows that have changed while the set of rows in one but not the other (and vice-versa) contain rows that are missing. Note: it is possible given sufficient density of the table for the changed rows span to contain missing rows. This is Ok because the output of the difference will still present the data as missing. 6. The output of (5) that contain the same spans (changed rows) is then used to form a difference and this is saved for presentation to the user. 7. The output of (6) that contain missing spans (missing rows) is then used to form a formatted list of the results for presentation to the user. 8. The compare databases are destroyed and differences (if any) are returned. A return value of None indicates the data is consistent. 9. Turn binary logging on if turned off in step (0). Exceptions: server1[in] first server Server instance server2[in] second server Server instance table1_name[in] name of first table in form 'db.name' table2_name[in] name of second table in form 'db.name' options[in] dictionary of options for the operation containing 'format' : format for output of missing rows 'difftype' : type of difference to show Returns None = data is consistent list of differences - data is not consistent """ from mysql.utilities.common.sql_transform import transform_data from mysql.utilities.common.table import Table format = options.get('format', 'GRID') difftype = options.get('difftype', 'unified') if options.get('toggle_binlog', 'False'): binlog_server1 = server1.binlog_enabled() if binlog_server1: server1.toggle_binlog("DISABLE") binlog_server2 = server2.binlog_enabled() if binlog_server2: server2.toggle_binlog("DISABLE") else: # set to false to skip after actions to turn binlog back on binlog_server1 = False binlog_server2 = False data_diffs = None table1 = Table(server1, table1_name) table2 = Table(server2, table2_name) # Setup the comparative tables and calculate the hashes pri_idx_str1, pri_idx_str2 = _setup_compare(table1, table2) # Populate the compare tables and retrieve rows from each table tbl1_hash = _make_sum_rows(table1, pri_idx_str1) tbl2_hash = _make_sum_rows(table2, pri_idx_str2) # Compare results in_both, in1_not2, in2_not1 = get_common_lists(tbl1_hash, tbl2_hash) # If mismatch found, go back to compare table and retrieve grouping. if len(in1_not2) != 0 or len(in2_not1) != 0: table1_diffs = [] table2_diffs = [] data_diffs = [] # Get keys for diffs on table1 for row in in1_not2: table1_diffs.append(row[0]) # Get keys for diffs on table2 for row in in2_not1: table2_diffs.append(row[0]) # Find changed and missing rows changed_rows, extra1, extra2 = get_common_lists( table1_diffs, table2_diffs) if len(changed_rows) > 0: data_diffs.append("# Data differences found among rows:") tbl1_rows = _get_rows_span(table1, changed_rows) tbl2_rows = _get_rows_span(table2, changed_rows) if difftype == 'sql': data_diffs.extend( transform_data(table1, table2, "UPDATE", (tbl1_rows, tbl2_rows))) else: rows1 = _get_formatted_rows(tbl1_rows, table1, format) rows2 = _get_formatted_rows(tbl2_rows, table2, format) diff_str = _get_diff(rows1, rows2, table1_name, table2_name, difftype) if len(diff_str) > 0: data_diffs.extend(diff_str) if len(extra1) > 0: rows = _get_rows_span(table1, extra1) if difftype == 'sql': data_diffs.extend( transform_data(table1, table2, "DELETE", rows)) else: data_diffs.append("\n# Rows in {0} not in {1}".format( table1_name, table2_name)) res = _get_formatted_rows(rows, table1, format) data_diffs.extend(res) if len(extra2) > 0: rows = _get_rows_span(table2, extra2) if difftype == 'sql': data_diffs.extend( transform_data(table1, table2, "INSERT", rows)) else: data_diffs.append("\n# Rows in {0} not in {1}".format( table2_name, table1_name)) res = _get_formatted_rows(rows, table2, format) data_diffs.extend(res) if binlog_server1: server1.toggle_binlog("ENABLE") if binlog_server2: server2.toggle_binlog("ENABLE") return data_diffs
def _export_table_data(source_srv, table, output_file, options): """Export the table data. This private method retrieves the data for the specified table in SQL format (e.g., INSERT statements) or in a tabular form (GRID, TAB, CSV, VERTICAL) to the specified output file or a separated file, according to the defined options. source_srv[in] Server instance or dictionary with connection values. table[in] Table to export, tuple with database name and table name. output_file[in] Output file to store the export data. options[in] Dictionary containing the options for the export: (skip_tables, skip_views, skip_triggers, skip_procs, skip_funcs, skip_events, skip_grants, skip_create, skip_data, no_header, display, format, file_per_tbl, and debug). return a filename if a temporary file is created to store the output result (used for multiprocessing) otherwise None. """ frmt = options.get("format", "sql") no_headers = options.get("no_headers", True) single = options.get("single", False) skip_blobs = options.get("skip_blobs", False) quiet = options.get("quiet", False) file_per_table = options.get("file_per_tbl", False) # Handle source server instance or server connection values. # Note: For multiprocessing the use of connection values instead of a # server instance is required to avoid internal errors. if isinstance(source_srv, Server): source = source_srv else: # Get source server instance from connection values. conn_options = { 'quiet': True, # Avoid repeating output for multiprocessing. 'version': "5.1.30", } servers = connect_servers(source_srv, None, conn_options) source = servers[0] # Handle qualified table name (with backtick quotes). db_name = table[0] tbl_name = "{0}.{1}".format(db_name, table[1]) q_db_name = quote_with_backticks(db_name) q_tbl_name = "{0}.{1}".format(q_db_name, quote_with_backticks(table[1])) # Determine output file to store exported table data. if file_per_table: # Store result of table export to a separated file. file_name = _generate_tbl_filename(tbl_name, frmt) outfile = open(file_name, "w+") tempfile_used = False else: if output_file: # Output file to store result is defined. outfile = output_file tempfile_used = False else: # Store result in a temporary file (merged later). # Used by multiprocess export. tempfile_used = True outfile = tempfile.NamedTemporaryFile(delete=False) message = "# Data for table {0}:".format(q_tbl_name) outfile.write("{0}\n".format(message)) tbl_options = {'verbose': False, 'get_cols': True, 'quiet': quiet} cur_table = Table(source, q_tbl_name, tbl_options) if single and frmt not in ("sql", "grid", "vertical"): retrieval_mode = -1 first = True else: retrieval_mode = 1 first = False for data_rows in cur_table.retrieve_rows(retrieval_mode): _export_row(data_rows, cur_table, frmt, single, skip_blobs, first, no_headers, outfile) if first: first = False if file_per_table: outfile.close() return outfile.name if tempfile_used else None