def check_schema(zk_prefix, tablename, tbl_hash): """Verify that a table across an entire tier has the expected schema Args: zk_prefix - The prefix of the key in the DS KZ node table - the name of the table to verify tbl_hash - the md5sum of the desired CREATE TABLE for the table Returns: A dictionary with keys that are the hash of the CREATE TABLE statement and the values are sets of hostname:port followed by a space and then the db one which the incorrect schema was found. """ incorrect = dict() zk = host_utils.MysqlZookeeper() config = zk.get_ds_mysql_config() for db in config.iteritems(): if db[0].startswith(zk_prefix): master = host_utils.HostAddr(''.join((db[1]['master']['host'], ':', str(db[1]['master']['port'])))) slave = host_utils.HostAddr(''.join((db[1]['slave']['host'], ':', str(db[1]['slave']['port'])))) master_hashes = check_instance_table(master, tablename, tbl_hash) slave_hashes = check_instance_table(slave, tablename, tbl_hash) for entry in master_hashes.iteritems(): if entry[0] not in incorrect: incorrect[entry[0]] = set() incorrect[entry[0]] = incorrect[entry[0]].union(entry[1]) for entry in slave_hashes.iteritems(): if entry[0] not in incorrect: incorrect[entry[0]] = set() incorrect[entry[0]] = incorrect[entry[0]].union(entry[1]) return incorrect
def main(): description = 'Utility to download and restore MySQL xbstream backups' parser = argparse.ArgumentParser(description=description) parser.add_argument('-b', '--backup_type', help='Type of backup to restore. Default: xtrabackup', default=backup.BACKUP_TYPE_XBSTREAM, choices=(backup.BACKUP_TYPE_LOGICAL, backup.BACKUP_TYPE_XBSTREAM)) parser.add_argument('-s', '--source_instance', help=('Which instances backups to restore. Default is ' 'a best guess based on the hostname.'), default=None) parser.add_argument('-d', '--date', help='attempt to restore from a specific date') parser.add_argument('-p', '--destination_port', help='Port on localhost on to restore. Default 3306.', default='3306') parser.add_argument('--no_repl', help='Setup replication but do not run START SLAVE', default='REQ', action='store_const', const='SKIP') parser.add_argument('--add_to_zk', help=('By default the instance will not be added to ' 'zk. This option will attempt to add the ' 'instance to zk.'), default='SKIP', action='store_const', const='REQ') parser.add_argument('--skip_production_check', help=('DANGEROUS! Skip check of whether the instance ' 'to be built is already in use'), default=False, action='store_true') args = parser.parse_args() if args.source_instance: source = host_utils.HostAddr(args.source_instance) else: source = None destination = host_utils.HostAddr(':'.join((host_utils.HOSTNAME, args.destination_port))) restore_instance(backup_type=args.backup_type, restore_source=source, destination=destination, no_repl=args.no_repl, date=args.date, add_to_zk=args.add_to_zk, skip_production_check=args.skip_production_check)
def main(): parser = argparse.ArgumentParser(description='MySQL replication checker') parser.add_argument('replica', help='Replica MySQL instance to sanity check ' 'hostname[:port]') parser.add_argument('-w', '--watch_for_catch_up', help='Watch replication for catch up ', default=False, action='store_true') args = parser.parse_args() slave_hostaddr = host_utils.HostAddr(args.replica) if args.watch_for_catch_up: mysql_lib.wait_for_catch_up(slave_hostaddr) else: ret = mysql_lib.calc_slave_lag(slave_hostaddr) print "Heartbeat_seconds_behind: {sbm}".format(sbm=ret['sbm']) print "Slave_IO_Running: {Slave_IO_Running} ".format( Slave_IO_Running=ret['ss']['Slave_IO_Running']) print "IO_lag_bytes: {io_bytes}".format(io_bytes=ret['io_bytes']) print "IO_lag_binlogs: {io_binlogs}".format( io_binlogs=ret['io_binlogs']) print "Slave_SQL_Running: {Slave_IO_Running} ".format( Slave_IO_Running=ret['ss']['Slave_SQL_Running']) print "SQL_lag_bytes: {sql_bytes}".format(sql_bytes=ret['sql_bytes']) print "SQL_lag_binlogs: {sql_binlogs}".format( sql_binlogs=ret['sql_binlogs'])
def main(): parser = argparse.ArgumentParser() parser.add_argument('--db', default=None, help='DB to export, default is all databases.') parser.add_argument('--force_table', default=None, help='Table to export, default is all tables.') parser.add_argument('--force_reupload', default=False, action='store_true', help='Ignore existing uploads, reupload everyting') parser.add_argument('--loglevel', default='INFO', help='Change logging verbosity', choices=set(['INFO', 'DEBUG'])) parser.add_argument('--dev_bucket', default=False, action='store_true', help='Use the dev bucket, useful for testing') args = parser.parse_args() logging.basicConfig(level=getattr(logging, args.loglevel.upper(), None)) # Nope, don't even start. if os.path.isfile(backup.CSV_BACKUP_SKIP_FILE): log.info('Found {}. Skipping CSV backup ' 'run.'.format(backup.CSV_BACKUP_SKIP_FILE)) return # If we ever want to run multi instance, this wil need to be updated backup_obj = mysql_backup_csv(host_utils.HostAddr(host_utils.HOSTNAME), args.db, args.force_table, args.force_reupload, args.dev_bucket) backup_obj.backup_instance()
def collectTableStats(db): """ Collect table stats Args: db - a db object """ # First we are going to pull stats aggregated by schema # and namespace, if applicable global collection_time, last_collection_time instance = host_utils.HostAddr(':'.join((socket.gethostname(), db.port))) namespace_dbs_map = dict() non_namespace_dbs = set() for schema in mysql_lib.get_dbs(instance): namespace = get_namespace_from_schema(schema) if namespace: if namespace not in namespace_dbs_map: namespace_dbs_map[namespace] = set() namespace_dbs_map[namespace].add(schema) else: non_namespace_dbs.add(schema) for namespace in namespace_dbs_map: for row in get_tablestats(db, namespace_dbs_map[namespace]): printmetrics_tablestat(db, row, namespace) if non_namespace_dbs: for row in get_tablestats(db, non_namespace_dbs): printmetrics_tablestat(db, row) # next we want table stats aggregated by table and namespace. for namespace in namespace_dbs_map: for row in get_schemastats(db, namespace_dbs_map[namespace]): printmetrics_schemastats(db, row, namespace) if non_namespace_dbs: for row in get_schemastats(db, non_namespace_dbs): printmetrics_schemastats(db, row) db.query("FLUSH NO_WRITE_TO_BINLOG TABLE_STATISTICS")
def determine_replacement_role(conn, instance_id): """ Try to determine the role an instance should be placed into Args: conn - A connection to the reporting server instance - The replacement instance Returns: The replication role which should be either 'slave' or 'dr_slave' """ zk = host_utils.MysqlZookeeper() cursor = conn.cursor() sql = ("SELECT old_host " "FROM mysqlops.host_replacement_log " "WHERE new_instance = %(new_instance)s ") params = {'new_instance': instance_id} cursor.execute(sql, params) log.info(cursor._executed) result = cursor.fetchone() if result is None: raise Exception('Could not determine replacement host') old_host = host_utils.HostAddr(result['old_host']) log.info('Host to be replaced is {old_host}' ''.format(old_host=old_host.hostname)) (_, repl_type) = zk.get_replica_set_from_instance(old_host) if repl_type == host_utils.REPLICA_ROLE_MASTER: raise Exception('Corwardly refusing to replace a master!') elif repl_type is None: raise Exception('Could not determine replacement role') else: return repl_type
def main(): description = ("MySQL orpahned shard detector\n\n" "This utility will attempt to find orphaned databases " "across sharddb and modsharddb") parser = argparse.ArgumentParser( description=description, formatter_class=argparse.RawTextHelpFormatter) parser.add_argument('-i', '--instance', help='Check a single instance rather than all', default=False) args = parser.parse_args() if args.instance: instance = host_utils.HostAddr(args.instance) else: instance = False orphaned, orphaned_but_used, missing = find_shard_mismatches(instance) for orphan in orphaned: print 'Orphan dbs {host} {dbs}'.format(host=orphan, dbs=','.join(orphaned[orphan])) for orphan in orphaned_but_used: print 'Orphan but still used dbs {host} {dbs}'.format( host=orphan, dbs=','.join(orphaned_but_used[orphan])) for orphan in missing: print 'Missing dbs {host} {dbs}'.format(host=orphan, dbs=','.join(missing[orphan])) if not orphaned and not orphaned_but_used and not missing: print "No problems found"
def main(): parser = argparse.ArgumentParser(description="Is ETL running on a " "different instance?") parser.add_argument('instance', nargs='?', help="Instance to inspect, default is localhost:3306", default=''.join((host_utils.HOSTNAME, ':3306'))) args = parser.parse_args() instance = host_utils.HostAddr(args.instance) zk = host_utils.MysqlZookeeper() (replica_set, replica_type) = zk.get_replica_set_from_instance(instance) if replica_type == host_utils.REPLICA_ROLE_DR_SLAVE: inst = zk.get_mysql_instance_from_replica_set( replica_set, host_utils.REPLICA_ROLE_SLAVE) elif replica_type == host_utils.REPLICA_ROLE_SLAVE: inst = zk.get_mysql_instance_from_replica_set( replica_set, host_utils.REPLICA_ROLE_DR_SLAVE) else: exit_unknown_error() if not inst: # if there is not another slave in zk, there is not possibility # it is ok exit_other_slave_not_running_etl() try: running = mysql_backup_status.csv_backups_running(instance) except: exit_other_slave_not_running_etl() if not running: exit_other_slave_not_running_etl() exit_other_slave_running_etl()
def main(): parser = argparse.ArgumentParser() parser.add_argument('instance', help='The master to be demoted') parser.add_argument('--trust_me_its_dead', help=('You say you know what you are doing. We are ' 'going to trust you and hope for the best'), default=False, action='store_true') parser.add_argument('--ignore_dr_slave', help=('Need to promote, but alreaedy have a dead ' 'dr_slave? This option is what you looking ' 'for. The dr_slave will be completely ' 'ignored.'), default=False, action='store_true') parser.add_argument('--dry_run', help=('Do not actually run a promotion, just run ' 'safety checks, etc...'), default=False, action='store_true') parser.add_argument('--skip_lock', help=('Do not take a promotion lock. Scary.'), default=False, action='store_true') parser.add_argument('--kill_old_master', help=('If we can not get the master into read_only, ' ' send a mysqladmin kill to the old master.'), default=False, action='store_true') args = parser.parse_args() instance = host_utils.HostAddr(args.instance) mysql_failover(instance, args.dry_run, args.skip_lock, args.ignore_dr_slave, args.trust_me_its_dead, args.kill_old_master)
def main(): parser = argparse.ArgumentParser(description=DESCRIPTION) parser.add_argument('-i', '--instance', help='The instance to query. This should ' 'be the master of a replica set, but ' 'if you supply a non-master, the script ' 'will query the master anyway.') parser.add_argument('timestamp', help='The timestamp to rewind to. This must ' 'be in MySQL format: YYYY-MM-DD HH:MM:SS') args = parser.parse_args() try: instance = host_utils.HostAddr(args.instance) zk = host_utils.MysqlZookeeper() rt = zk.get_replica_type_from_instance(instance) if rt != host_utils.REPLICA_ROLE_MASTER: instance = zk.get_mysql_instance_from_replica_set( zk.get_replica_set_from_instance(instance), host_utils.REPLICA_ROLE_MASTER) log.info('Detected master of {i} as {m}'.format(i=args.instance, m=instance)) timestamp = dt.datetime.strptime(args.timestamp, MYSQL_DT_FORMAT) except Exception as e: log.error("Error in argument parsing: {}".format(e)) gtid = find_gtid_for_timestamp(instance, timestamp) if gtid: print gtid else: sys.exit(255)
def main(): description = ("MySQL orpahned shard detector\n\n" "This utility will attempt to find orphaned databases " "across sharded MySQL systems") parser = argparse.ArgumentParser( description=description, formatter_class=argparse.RawTextHelpFormatter) parser.add_argument('-i', '--instance', help='Check a single instance rather than all', default=False) args = parser.parse_args() if args.instance: instance = host_utils.HostAddr(args.instance) else: instance = False orphaned, orphaned_but_used, missing = find_shard_mismatches(instance) for o in orphaned: log.info('Orphan dbs: {host} {dbs}'.format(host=o, dbs=','.join(orphaned[o]))) for obu in orphaned_but_used: log.info('Orphan, but still used, dbs: {host} {dbs}'.format( host=obu, dbs=','.join(orphaned_but_used[obu]))) for m in missing: log.info('Missing dbs:{host} {dbs}'.format(host=m, dbs=','.join(missing[m]))) if not (orphaned or orphaned_but_used or missing): log.info('No problems found!')
def main(): instance = host_utils.HostAddr(host_utils.HOSTNAME) if os.path.isfile(TOUCH_STOP_KILLING): log.info('Found {path}. Will not kill backups.\n' 'Exiting now.'.format(path=TOUCH_STOP_KILLING)) return kill_mysql_backup(instance) kill_xtrabackup()
def main(): parser = argparse.ArgumentParser(description='xtrabackup wrapper') parser.add_argument('-p', '--port', help='Port to backup on localhost (default: 3306)', default='3306') args = parser.parse_args() instance = host_utils.HostAddr(':'.join((socket.getfqdn(), args.port))) xtrabackup_backup_instance(instance)
def main(): parser = argparse.ArgumentParser(description='cleanup and chmod backups') parser.add_argument('-p', '--port', help='Port to backup on localhost (default: 3306)', default='3306') args = parser.parse_args() instance = host_utils.HostAddr(''.join((socket.getfqdn(), ':', args.port))) purge_mysql_backups(instance)
def main(): parser = argparse.ArgumentParser(description='MySQL schema verifier') parser.add_argument( 'instance_type', help='Type of MySQL instance to verify', choices=environment_specific.SHARDED_DBS_PREFIX_MAP.keys()) parser.add_argument( 'table', help='Table to check', ) parser.add_argument( 'seed_instance', help=('Which host from which to fetch a table ' ' definition. (format hostname[:port])'), ) parser.add_argument('seed_db', help=('Which db on --seed_instance from which to fetch' ' a table definition. (ex pbdata012345)')) args = parser.parse_args() zk_prefix = environment_specific.SHARDED_DBS_PREFIX_MAP[ args.instance_type]['zk_prefix'] seed_instance = host_utils.HostAddr(args.seed_instance) desired = mysql_lib.show_create_table(seed_instance, args.seed_db, args.table) tbl_hash = hashlib.md5(desired).hexdigest() print("Desired table definition:\n{desired}").format(desired=desired) incorrect = check_schema(zk_prefix, args.table, tbl_hash) if len(incorrect) == 0: print "It appears that all schema is synced" sys.exit(0) d = difflib.Differ() for problem in incorrect.iteritems(): represenative = list(problem[1])[0].split(' ') hostaddr = host_utils.HostAddr(represenative[0]) create = mysql_lib.show_create_table(hostaddr, represenative[1], args.table) diff = d.compare(desired.splitlines(), create.splitlines()) print 'The following difference has been found:' print '\n'.join(diff) print "It is present on the following db's:" print '\n'.join(list(problem[1])) sys.exit(1)
def main(): action_desc = """Action description rename - after checking no recent changes and shard not in zk, create a db with the old name appended to 'dropme_'. Then copy all tables to the new db revert_rename - Copy all tables back from a 'dropme_' to their original table drop - This should be run a few days after a rename. Drop the empty original db, and drop the 'dropme_' db. """ parser = argparse.ArgumentParser(description='MySQL shard cleanup utility', epilog=action_desc, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('-i', '--instance', help='Instance to act on if other than localhost:3306', default=''.join((socket.getfqdn(), ':3306'))) parser.add_argument('-a', '--action', choices=('rename', 'revert_rename', 'drop',), required=True) parser.add_argument('-d', '--dbs', help=("Comma seperated list of db's to act upon"), required=True) parser.add_argument('-r', '--dry_run', help=("Do not change any state"), default=False, action='store_true') parser.add_argument('-v', '--verbose', default=False, action='store_true') args = parser.parse_args() dbs = set(args.dbs.split(',')) instance = host_utils.HostAddr(args.instance) if args.action == 'rename': rename_db_to_drop(instance, dbs, args.verbose, args.dry_run) elif args.action == 'revert_rename': conn = mysql_lib.connect_mysql(instance) for db in dbs: mysql_lib.move_db_contents(conn=conn, old_db=''.join((DB_PREPEND, db)), new_db=db, verbose=args.verbose, dry_run=args.dry_run) elif args.action == 'drop': drop_db_after_rename(instance, dbs, args.verbose, args.dry_run)
def main(): parser = argparse.ArgumentParser() parser.add_argument('server', help='The server to be fenced') parser.add_argument('--dry_run', help=('Do not actually fence the host, ' 'just show the intended ' 'configuration'), default=False, action='store_true') args = parser.parse_args() add_fence_to_host(hostname=host_utils.HostAddr(args.server), dry_run=args.dry_run)
def check_replication_for_migration(source_replica_set, destination_replica_set): """ Confirm that replication is sane for finishing a shard migration Args: source_replica_set - Where shards are coming from destination_replica_set - Where shards are being sent """ zk = host_utils.MysqlZookeeper() source_master = zk.get_mysql_instance_from_replica_set(source_replica_set) destination_master = zk.get_mysql_instance_from_replica_set( destination_replica_set) source_slave = zk.get_mysql_instance_from_replica_set( source_replica_set, host_utils.REPLICA_ROLE_SLAVE) destination_slave = zk.get_mysql_instance_from_replica_set( destination_replica_set, host_utils.REPLICA_ROLE_SLAVE) # First we will confirm that the slave of the source is caught up # this is important for row count comparisons mysql_lib.assert_replication_unlagged( source_slave, mysql_lib.REPLICATION_TOLERANCE_NORMAL) # Next, the slave of the destination replica set for the same reason mysql_lib.assert_replication_unlagged( destination_slave, mysql_lib.REPLICATION_TOLERANCE_NORMAL) # Next, the destination master is relatively caught up to the source master mysql_lib.assert_replication_unlagged( destination_master, mysql_lib.REPLICATION_TOLERANCE_NORMAL) # We will also verify that the source master is not replicating. A scary # scenario is if the there is some sort of ring replication going and db # drops of blackhole db's would propegate to the source db. try: source_slave_status = mysql_lib.get_slave_status(source_master) except mysql_lib.ReplicationError: source_slave_status = None if source_slave_status: raise Exception('Source master is setup for replication ' 'this is super dangerous!') # We will also verify that the destination master is replicating from the # source master slave_status = mysql_lib.get_slave_status(destination_master) master_of_destination_master = host_utils.HostAddr(':'.join( (slave_status['Master_Host'], str(slave_status['Master_Port'])))) if source_master != master_of_destination_master: raise Exception('Master of destination {d} is {actual} rather than ' 'expected {expected} ' ''.format(d=destination_master, actual=master_of_destination_master, expected=destination_master)) log.info('Replication looks ok for migration')
def collectReplicationStatus(db): """ Collect replication stats using mysql_lib.calc_slave_lag """ instance = host_utils.HostAddr(':'.join((socket.gethostname(), db.port))) ret = mysql_lib.calc_slave_lag(instance) printmetric(db, "slave.seconds_behind_master", ret['sbm']) printmetric(db, "slave.io_bytes_behind", ret["io_bytes"]) printmetric(db, "slave.sql_bytes_behind", ret["sql_bytes"]) printmetric(db, "slave.thread_io_running", int('yes' == ret['ss']['Slave_IO_Running'].lower())) printmetric(db, "slave.thread_sql_running", int('yes' == ret['ss']['Slave_SQL_Running'].lower()))
def process_mysql_shutdown(hostname=None, dry_run=False): """ Check stats, and shutdown MySQL instances""" zk = host_utils.MysqlZookeeper() username, password = mysql_lib.get_mysql_user_for_role('admin') shutdown_instances = get_retirement_queue_servers(SHUTDOWN_MYSQL) if hostname: if hostname in shutdown_instances: log.info('Only acting on {}'.format(hostname)) shutdown_instances = {hostname: shutdown_instances[hostname]} else: log.info('Supplied host {} is not ready ' 'for shutdown'.format(hostname)) return for instance in shutdown_instances: if instance in get_protected_hosts('set'): log.warning('Host {hostname} is protected from ' 'retirement'.format(hostname=hostname)) remove_from_retirement_queue(hostname) continue for active_instance in zk.get_all_mysql_instances(): if active_instance.hostname == instance: log.warning("It appears {instance} is in zk. This is " "very dangerous! If you got to here, you may be " "trying to turn down a replica set. Please remove " "it from zk and try again" "".format(instance=instance)) continue if dry_run: log.info('In dry_run mode, not changing state') continue try: if check_for_user_activity(shutdown_instances[instance]): log.info('Activity detected on {}, removing from queue' ''.format(instance)) remove_from_retirement_queue(hostname) continue else: log.info('Shutting down mysql on {}'.format(instance)) mysql_lib.shutdown_mysql(host_utils.HostAddr(instance)) except MySQLdb.OperationalError as detail: (error_code, msg) = detail.args if error_code != mysql_lib.MYSQL_ERROR_CONN_HOST_ERROR: raise log.warning("Can't connect to MySQL on {}".format(instance)) log_to_retirement_queue(instance, shutdown_instances[instance]['instance_id'], SHUTDOWN_MYSQL)
def main(): parser = argparse.ArgumentParser(description="Print out the replica role " "for an instance") parser.add_argument('instance', nargs='?', help="Instance to inspect, default is localhost:3306", default=''.join((host_utils.HOSTNAME, ':3306'))) args = parser.parse_args() instance = host_utils.HostAddr(args.instance) zk = host_utils.MysqlZookeeper() replica_type = zk.get_replica_type_from_instance(instance) print replica_type
def check_for_user_activity(instance): username, password = mysql_lib.get_mysql_user_for_role('admin') # check mysql activity log.info('Checking activity on {}'.format(instance['hostname'])) activity = mysql_lib.get_user_activity( host_utils.HostAddr(instance['hostname'])) unexpected = set(activity.keys()).difference(IGNORABLE_USERS) if unexpected: log.error('Unexpected activity on {instance} by user(s):' '{unexpected}'.format(instance=instance['hostname'], unexpected=','.join(unexpected))) return True log.info('Checking current connections on ' '{instance}'.format(instance=instance['hostname'])) # try catch here due to the query creates the temp file will break our # code if disk space is full try: connected_users = mysql_lib.get_connected_users( host_utils.HostAddr(instance['hostname'])) except MySQLdb.InternalError as detail: (err_code, msg) = detail.args if err_code == mysql_lib.MYSQL_ERROR_CANT_CREATE_WRITE_TO_FILE: log.info('No space left on device') return False except: log.info('Something else is not correct here') return False unexpected = connected_users.difference(IGNORABLE_USERS) if unexpected: log.error('Unexpected connection on {instance} by user(s):' '{unexpected}'.format(instance=instance['hostname'], unexpected=','.join(unexpected))) return True return False
def find_shard_mismatches(instance=False): """ Find shards that are missing or unexpected in modhsarddb and sharddb Args: instance - If supplied, only check this instance. Returns: orphaned - A dict of unexpected and (according to table statistics) unused shards. Key is master instance, value is a set. orphaned_but_used - A dict of unexpected and but used shards. Data strucutre is the same as orphaned. missing - A dict of expected but missing shards. Data strucutre is the same as orphaned. """ orphaned = dict() orphaned_but_used = dict() missing_shards = dict() zk = host_utils.MysqlZookeeper() host_shard_map = zk.get_host_shard_map() if instance: new_host_shard_map = dict() new_host_shard_map[instance.__str__()] = host_shard_map[ instance.__str__()] host_shard_map = new_host_shard_map for master in host_shard_map: expected_shards = host_shard_map[master] instance = host_utils.HostAddr(master) conn = mysql_lib.connect_mysql(instance) activity = mysql_lib.get_dbs_activity(conn) actual_shards = mysql_lib.get_dbs(conn) unexpected_shards = actual_shards.difference(expected_shards) missing = expected_shards.difference(actual_shards) if missing: missing_shards[master] = expected_shards.difference(actual_shards) for db in unexpected_shards: if activity[db]['ROWS_CHANGED'] != 0: if master not in orphaned_but_used: orphaned_but_used[master] = set() orphaned_but_used[master].add(db) else: if master not in orphaned: orphaned[master] = set() orphaned[master].add(db) return orphaned, orphaned_but_used, missing_shards
def main(): parser = argparse.ArgumentParser() parser.add_argument('-p', '--port', help='Port to backup on localhost (default: 3306)', default='3306') parser.add_argument('-b', '--backup_type', help='Type of backup to run.', default=backup.BACKUP_TYPE_XBSTREAM, choices=backup.BACKUP_TYPES) args = parser.parse_args() instance = host_utils.HostAddr(':'.join((host_utils.HOSTNAME, args.port))) mysql_backup(instance, args.backup_type)
def main(): parser = argparse.ArgumentParser(description='MySQL backup reporting') parser.add_argument("-d", "--date", default=time.strftime('%Y-%m-%d'), help="Backup date. Ex: 2013-12-12") parser.add_argument("-f", "--show_found", default=False, action='store_true', help="Display found backups") parser.add_argument("-i", "--instance", default=host_utils.HOSTNAME, help=("Check backup status for this instance if " "not the default (localhost:3306)")) parser.add_argument("-a", "--all", action='store_true', help="Check all replica sets") args = parser.parse_args() zk = host_utils.MysqlZookeeper() if args.all: replica_sets = zk.get_all_mysql_replica_sets() else: instance = host_utils.HostAddr(args.instance) # if we aren't in ZK, we will exit with a special return code # that can be picked up by the nagios check. try: (replica_set, _) = zk.get_replica_set_from_instance(instance) replica_sets = set([replica_set]) except Exception as e: print "Nothing known about backups for {i}: {e}".format(i=instance, e=e) sys.exit(BACKUP_NOT_IN_ZK_RETURN) return_code = BACKUP_OK_RETURN for replica_set in replica_sets: found_backup = find_mysql_backup(replica_set, args.date) if found_backup is not None: if args.show_found: print "{file}".format(file=found_backup) else: print "Backup not found for replica set {rs}".format( rs=replica_set) return_code = BACKUP_MISSING_RETURN sys.exit(return_code)
def find_unused_server_name(replica_set, conn, dry_run): """ Increment a db servers hostname The current naming convention for db servers is: {Shard Type}-{Shard number}-{Server number} Note: The current naming convention for db servers is: {Shard Type}{Shard number}{Server letter} The purpose of this function is to find the next server letter that is not used. Args: replica_set - The replica of the host to be replaced conn - A mysql connection to the reporting server dry_run - don't log that a hostname will be used """ cmdb_servers = environment_specific.get_all_replica_set_servers( replica_set) next_host_num = 1 for server in cmdb_servers: host = host_utils.HostAddr(server['config.name']) # We should be able to iterate over everything that came back from the # cmdb and find out the greatest host number in use for a replica set if not host.host_identifier: # unparsable, probably not previously under dba management continue if (len(host.host_identifier) == 1 and ord(host.host_identifier) in range(ord('a'), ord('z'))): # old style hostname continue if int(host.host_identifier) >= next_host_num: next_host_num = int(host.host_identifier) + 1 new_hostname = '-'.join((replica_set, str(next_host_num))) while True: if is_hostname_new(new_hostname, conn): if not dry_run: log_new_hostname(new_hostname, conn) return new_hostname log.info('Hostname {hostname} has been logged to be in use but is not ' 'in brood or dns'.format(hostname=new_hostname)) next_host_num = next_host_num + 1 new_hostname = '-'.join((replica_set, str(next_host_num)))
def archive_mysql_binlogs(port, dry_run): """ Flush logs and upload all binary logs that don't exist to s3 Arguments: port - Port of the MySQL instance on which to act dry_run - Display output but do not uplad """ binlog_rotator.rotate_binlogs_if_needed(port, dry_run) zk = host_utils.MysqlZookeeper() instance = host_utils.HostAddr(':'.join((host_utils.HOSTNAME, str(port)))) if zk.get_replica_set_from_instance(instance)[0] is None: log.info('Instance is not in production, exiting') return lock_handle = None ensure_binlog_archiving_table_sanity(instance) try: log.info('Taking binlog archiver lock') lock_handle = host_utils.take_flock_lock(BINLOG_LOCK_FILE) log_bin_dir = host_utils.get_cnf_setting('log_bin', port) bin_logs = mysql_lib.get_master_logs(instance) logged_uploads = get_logged_binlog_uploads(instance) for binlog in bin_logs[:-1]: err_count = 0 local_file = os.path.join(os.path.dirname(log_bin_dir), binlog['Log_name']) if already_uploaded(instance, local_file, logged_uploads): continue success = False while not success: try: upload_binlog(instance, local_file, dry_run) success = True except: if err_count > MAX_ERRORS: log.error('Error count in thread > MAX_THREAD_ERROR. ' 'Aborting :(') raise log.error('error: {e}'.format(e=traceback.format_exc())) err_count = err_count + 1 time.sleep(err_count*2) log.info('Archiving complete') finally: if lock_handle: log.info('Releasing lock') host_utils.release_flock_lock(lock_handle)
def main(): parser = argparse.ArgumentParser() parser.add_argument('action', help=("What modification to make. If 'auto', the host " "replacement log will be used to determine what " "what role to use. Default is auto."), choices=[ 'add_slave', 'add_dr_slave', 'auto', 'swap_master_and_slave', 'swap_slave_and_dr_slave' ], default='auto') parser.add_argument('instance', help='What instance to act upon') parser.add_argument('--dry_run', help=('Do not actually modify zk, just show ' 'what would be modify'), default=False, action='store_true') parser.add_argument('--dangerous', help=('If you need to swap_master_and_slave in zk' 'outside of the failover script, that is ' 'dangerous and you will need this flag.'), default=False, action='store_true') args = parser.parse_args() action = args.action instance = host_utils.HostAddr(args.instance) if args.dry_run: log.removeHandler(chat_handler) if action == 'add_slave': add_replica_to_zk(instance, host_utils.REPLICA_ROLE_SLAVE, args.dry_run) elif action == 'add_dr_slave': add_replica_to_zk(instance, host_utils.REPLICA_ROLE_DR_SLAVE, args.dry_run) elif action == 'swap_master_and_slave': if args.dangerous: swap_master_and_slave(instance, args.dry_run) else: raise Exception('To swap_master_and_slave in zk outside of the ' 'failover script is very dangerous and the ' '--dangerous flag was not supplied.') elif action == 'swap_slave_and_dr_slave': swap_slave_and_dr_slave(instance, args.dry_run) elif action == 'auto': auto_add_instance_to_zk(instance, args.dry_run) else: raise Exception('Invalid action: {action}'.format(action=action))
def check_for_user_activity(instance): zk = host_utils.MysqlZookeeper() username, password = mysql_lib.get_mysql_user_for_role('admin') # check mysql activity log.info('Checking activity on {instance}'.format( instance=instance['hostname'])) with timeout.timeout(3): conn = MySQLdb.connect(host=instance['internal_ip'], user=username, passwd=password, cursorclass=MySQLdb.cursors.DictCursor) if not conn: raise Exception('Could not connect to {ip}' ''.format(ip=instance['internal_ip'])) activity = mysql_lib.get_user_activity( host_utils.HostAddr(instance['hostname'])) unexpected = set(activity.keys()).difference(IGNORABLE_USERS) if unexpected: log.error('Unexpected activity on {instance} by user(s):' '{unexpected}'.format(instance=instance['hostname'], unexpected=','.join(unexpected))) return True log.info('Checking current connections on ' '{instance}'.format(instance=instance['hostname'])) connected_users = mysql_lib.get_connected_users( host_utils.HostAddr(instance['hostname'])) unexpected = connected_users.difference(IGNORABLE_USERS) if unexpected: log.error('Unexpected connection on {instance} by user(s):' '{unexpected}'.format(instance=instance['hostname'], unexpected=','.join(unexpected))) return True return False
def get_db_type(port): """ Get status in replica set via service discover Args: port: the port on localhost Returns: 'master', 'slave', 'dr_slave' or 'undef' """ try: instance = host_utils.HostAddr(':'.join((socket.gethostname(), str(port)))) zk = host_utils.MysqlZookeeper() (_, replica_type) = zk.get_replica_set_from_instance(instance) return replica_type except: return 'undef'