def restart_maxwell_if_not_exists(instance):
    """ Start Maxwell if it isn't currently running.
    Args:
        instance: (host_utils.HostAddr): host to check
    Returns:
        none
    """
    zk = host_utils.MysqlZookeeper()
    replica_type = zk.get_replica_type_from_instance(instance)
    gvars = mysql_lib.get_global_variables(instance)

    client_id = gvars['server_uuid']
    gtid_mode = True if gvars.get('gtid_mode') == 'ON' else False
    (username, _) = mysql_lib.get_mysql_user_for_role('maxwell')

    output_target = 'file'

    # master writes to kafka, everything else writes to /dev/null,
    # at least for now.
    if instance.hostname_prefix in environment_specific.MAXWELL_TARGET_MAP \
            and replica_type == host_utils.REPLICA_ROLE_MASTER:
        output_target = 'kafka'

    # we need to rewrite the config each time, because something may
    # have changed - i.e., a failover.  this is just a stopgap solution
    # pending resolution of LP-809
    mysql_cnf_builder.create_maxwell_config(client_id, instance, None,
                                            output_target, gtid_mode)

    # Check for the Maxwell PID file and then see if it belongs to Maxwell.
    maxwell_running = False
    try:
        with open(environment_specific.MAXWELL_PID, "r") as f:
            pid = f.read()

        proc = psutil.Process(int(pid))
        cmdline = proc.cmdline()

        if 'java' in cmdline and 'com.zendesk.maxwell.Maxwell' in cmdline:
            maxwell_running = True

    except (IOError, psutil.NoSuchProcess, psutil.ZombieProcess):
        # No PID file or no process matching said PID, so maxwell is definitely
        # not running. If maxwell is a zombie then it's not running either.
        pass

    if maxwell_running:
        log.debug('Maxwell is already running')
        return

    if instance.hostname_prefix in environment_specific.MAXWELL_TARGET_MAP:
        host_utils.manage_maxwell(instance.port)
        log.info('Started Maxwell process')
示例#2
0
def find_gtid_for_timestamp(instance, timestamp):
    """ Find the GTID for the supplied timestamp on the specified
        instance. 

    Args:
        instance: a HostAddr object
        timestamp: the timestamp to search for
    Returns:
        If the instance doesn't support GTID, return None.
        If no GTID was found in the binlogs for the supplied
        timestamp, return a blank string.
        Otherwise, return a GTID.
    """
    vars = mysql_lib.get_global_variables(instance)

    # we are not generating GTIDs / no GTID support
    if vars['gtid_mode'] == 'OFF' or vars['gtid_deployment_step'] == 'ON':
        log.warning('This replica set does not currently support GTID')
        return None

    # go in reverse order, because odds are that the log we want
    # is closer to the end than the beginning.
    master_logs = list(reversed(mysql_lib.get_master_logs(instance)))

    (username, password) = mysql_lib.get_mysql_user_for_role('replication')
    for binlog in master_logs:
        # if the timestamp we want is prior to the first entry in the
        # binlog, it can't possibly be in there.
        log_start = get_binlog_start(binlog['Log_name'], instance, username,
                                     password)
        if timestamp < log_start:
            log.debug('Skipping binlog {bl} because desired {ts} < '
                      '{ls}'.format(bl=binlog['Log_name'],
                                    ts=timestamp,
                                    ls=log_start))
            continue

        # The binlog that we end up checking, if we check one at all,
        # is the first one that could possibly contain our GTID, so
        # if it isn't in this one, we're not going to find anything.
        log.debug('Checking for matching GTID in {}'.format(
            binlog['Log_name']))
        gtid = check_one_binlog(timestamp, binlog['Log_name'], instance,
                                username, password)
        if gtid:
            return gtid
        else:
            break

    log.warning("No matching GTID was found for that timestamp.")
    return ''
def disk_space_available_for_migration(instance):
    """ Check the disk space available for migrations on the data dir mount

    Args:
    instance - A hostaddr object

    Returns: The number of MB available
    """
    datadir = mysql_lib.get_global_variables(instance)['datadir']
    cmd = MIGRATION_SPACE_CMD.format(hostname=instance.hostname,
                                     datadir=datadir,
                                     disk_limit=DISK_LIMIT)
    log.info(cmd)
    out, err, ret = host_utils.shell_exec(cmd)
    return float(out.strip())
示例#4
0
def get_master_mysql_major_version(instance):
    """ Given an instance, determine the mysql major version for the master
        of the replica set.

    Args:
    instance - a hostaddr object

    Returns - A string similar to '5.5' or '5.6'
   """
    zk = host_utils.MysqlZookeeper()
    master = zk.get_mysql_instance_from_replica_set(instance.get_zk_replica_set()[0],
                                                    repl_type=host_utils.REPLICA_ROLE_MASTER)
    master_conn = mysql_lib.connect_mysql(master)
    mysql_version = mysql_lib.get_global_variables(master_conn)['version'][:3]
    return mysql_version
def get_master_mysql_major_version(instance):
    """ Given an instance, determine the mysql major version for the master
        of the replica set.

    Args:
    instance - a hostaddr object

    Returns - A string similar to '5.5' or '5.6'
   """
    zk = host_utils.MysqlZookeeper()
    master = zk.get_mysql_instance_from_replica_set(instance.get_zk_replica_set()[0],
                                                    repl_type=host_utils.REPLICA_ROLE_MASTER)
    try:
        mysql_version = mysql_lib.get_global_variables(master)['version'][:3]
    except _mysql_exceptions.OperationalError:
        raise Exception('Could not connect to master server {instance} in '
                        'order to determine MySQL version to launch with. '
                        'Perhaps run this script from there? This is likely '
                        'due to firewall rules.'
                        ''.format(instance=instance.hostname))
    return mysql_version
示例#6
0
def get_master_mysql_major_version(instance):
    """ Given an instance, determine the mysql major version for the master
        of the replica set.

    Args:
    instance - a hostaddr object

    Returns - A string similar to '5.5' or '5.6'
   """
    zk = host_utils.MysqlZookeeper()
    master = zk.get_mysql_instance_from_replica_set(
        instance.get_zk_replica_set()[0],
        repl_type=host_utils.REPLICA_ROLE_MASTER)
    try:
        mysql_version = mysql_lib.get_global_variables(master)['version'][:3]
    except _mysql_exceptions.OperationalError:
        raise Exception('Could not connect to master server {instance} in '
                        'order to determine MySQL version to launch with. '
                        'Perhaps run this script from there? This is likely '
                        'due to firewall rules.'
                        ''.format(instance=instance.hostname))
    return mysql_version
def launch_replacement_db_host(original_server,
                               dry_run=False,
                               not_a_replacement=False,
                               overrides=dict(),
                               reason='',
                               replace_again=False):
    """ Launch a replacement db server

    Args:
    original_server - A hostAddr object for the server to be replaced
    dry_run - If True, do not actually launch a replacement
    not_a_replacement - If set, don't log the replacement, therefore
                        automation won't put it into prod use.
    overrides - A dict of overrides. Availible keys are
                'mysql_minor_version', 'hostname', 'vpc_security_group',
                'availability_zone', 'instance_type', and 'mysql_major_version'.
    reason - A description of why the host is being replaced. If the instance
             is still accessible and reason is not supply an exception will be
             thrown.
    replace_again - If True, ignore already existing replacements.
    """
    reasons = set()
    if reason:
        reasons.add(reason)

    log.info('Trying to launch a replacement for host {host} which is part '
             'of replica set is {replica_set}'.format(
                 host=original_server.hostname,
                 replica_set=original_server.get_zk_replica_set()[0]))

    zk = host_utils.MysqlZookeeper()
    try:
        (_, replica_type) = zk.get_replica_set_from_instance(original_server)
    except:
        raise Exception('Can not replace an instance which is not in zk')
    if replica_type == host_utils.REPLICA_ROLE_MASTER:
        # If the instance, we will refuse to run. No ifs, ands, or buts/
        raise Exception('Can not replace an instance which is a master in zk')

    # Open a connection to MySQL Ops and check if a replacement has already
    # been requested
    reporting_conn = mysql_lib.get_mysqlops_connections()
    existing_replacement = find_existing_replacements(reporting_conn,
                                                      original_server)
    if existing_replacement and not not_a_replacement:
        log.info('A replacement has already been requested: '
                 '{re}'.format(re=existing_replacement))
        if replace_again:
            log.info('Argument replace_again is set, continuing on.')
        else:
            age_of_replacement = datetime.datetime.now(
            ) - existing_replacement['created_at']
            if age_of_replacement.days < SERVER_BUILD_TIMEOUT:
                raise Exception('Argument replace_again is not True but a '
                                'replacement already exists.')
            else:
                log.info("A replacement already exists, but was launched "
                         "{days} days ago. The timeout for servers builds is "
                         "{timeout} days so we are automatically setting "
                         "replace_again.".format(days=age_of_replacement.days,
                                                 timeout=SERVER_BUILD_TIMEOUT))
                replace_again = True

    # Check to see if MySQL is up on the host
    try:
        # This is not multi instance compatible. If we move to multiple
        # instances this will need to be updated
        conn = mysql_lib.connect_mysql(original_server)
        conn.close()
        dead_server = False
        version_server = original_server
    except MySQLdb.OperationalError as detail:
        dead_server = True
        (error_code, msg) = detail.args
        if error_code != mysql_lib.MYSQL_ERROR_CONN_HOST_ERROR:
            raise
        log.info('MySQL is down, assuming hardware failure')
        reasons.add('hardware failure')
        version_server = zk.get_mysql_instance_from_replica_set(
            original_server.get_zk_replica_set()[0],
            repl_type=host_utils.REPLICA_ROLE_MASTER)

    # Pull some information from cmdb.
    cmdb_data = environment_specific.get_server_metadata(
        original_server.hostname)
    if not cmdb_data:
        raise Exception('Could not find information about server to be '
                        'replaced in the cmdb')

    if 'aws_status.codes' in cmdb_data:
        reasons.add(cmdb_data['aws_status.codes'])

    log.info('Data from cmdb: {cmdb_data}'.format(cmdb_data=cmdb_data))
    replacement_config = {
        'availability_zone':
        cmdb_data['location'],
        'vpc_security_group':
        cmdb_data['security_groups'],
        'hostname':
        find_unused_server_name(original_server.get_standardized_replica_set(),
                                reporting_conn, dry_run),
        'instance_type':
        cmdb_data['config.instance_type'],
        'mysql_major_version':
        mysql_lib.get_global_variables(version_server)['version'][0:3],
        'mysql_minor_version':
        DEFAULT_MYSQL_MINOR_VERSION,
        'dry_run':
        dry_run,
        'skip_name_check':
        True
    }

    # At this point, all our defaults should be good to go
    config_overridden = False

    # All other overrides
    for key in overrides.keys():
        if key not in replacement_config:
            raise Exception('Invalid override {key}'.format(key=key))

        if overrides[key]:
            if replacement_config[key] == overrides[key]:
                log.info('Override for key {key} does not modify '
                         'configuration'.format(key=key))
            else:
                log.info('Overriding {key} to value {new} from {old}'
                         ''.format(key=key,
                                   old=replacement_config[key],
                                   new=overrides[key]))
                reasons.add('changing {key} from {old} to '
                            '{new}'.format(key=key,
                                           old=replacement_config[key],
                                           new=overrides[key]))
                replacement_config[key] = overrides[key]
                config_overridden = True

    if config_overridden:
        log.info('Configuration after overrides: {replacement_config}'
                 ''.format(replacement_config=replacement_config))

    if not dead_server:
        try:
            mysql_lib.assert_replication_sanity(original_server)
        except Exception as e:
            log.info('Replication problem: {e}'.format(e=e))
            reasons.add('replication broken')

    # If we get to here and there is no reason, bail out
    if not reasons and not replacement_config['dry_run']:
        raise Exception(('MySQL appears to be up and no reason for '
                         'replacement is supplied. You can specify a reason '
                         'with the --reason argument'))
    reason = ', '.join(reasons)
    log.info('Reason for launch: {reason}'.format(reason=reason))

    new_instance_id = launch_amazon_mysql_server.launch_amazon_mysql_server(
        **replacement_config)
    if not (replacement_config['dry_run'] or not_a_replacement):
        log_replacement_host(reporting_conn, cmdb_data, new_instance_id,
                             replace_again, replacement_config, reason)
def launch_replacement_db_host(original_server,
                               dry_run=False,
                               not_a_replacement=False,
                               overrides=dict(),
                               reason='',
                               replace_again=False):
    """ Launch a replacement db server

    Args:
    original_server - A hostAddr object for the server to be replaced
    dry_run - If True, do not actually launch a replacement
    not_a_replacement - If set, don't log the replacement, therefore
                        automation won't put it into prod use.
    overrides - A dict of overrides. Availible keys are
                'mysql_minor_version', 'hostname', 'vpc_security_group',
                'availability_zone', 'instance_type', and 'mysql_major_version'.
    reason - A description of why the host is being replaced. If the instance
             is still accessible and reason is not supply an exception will be
             thrown.
    replace_again - If True, ignore already existing replacements.
    """
    reasons = set()
    if reason:
        reasons.add(reason)

    log.info('Trying to launch a replacement for host {host} which is part '
             'of replica set is {replica_set}'.format(host=original_server.hostname,
                                                      replica_set=original_server.get_zk_replica_set()[0]))

    zk = host_utils.MysqlZookeeper()
    try:
        (_, replica_type) = zk.get_replica_set_from_instance(original_server)
    except:
        raise Exception('Can not replace an instance which is not in zk')
    if replica_type == host_utils.REPLICA_ROLE_MASTER:
        # If the instance, we will refuse to run. No ifs, ands, or buts/
        raise Exception('Can not replace an instance which is a master in zk')

    # Open a connection to MySQL Ops and check if a replacement has already
    # been requested
    reporting_conn = mysql_lib.get_mysqlops_connections()
    existing_replacement = find_existing_replacements(reporting_conn,
                                                      original_server)
    if existing_replacement and not not_a_replacement:
        log.info('A replacement has already been requested: '
                 '{re}'.format(re=existing_replacement))
        if replace_again:
            log.info('Argument replace_again is set, continuing on.')
        else:
            age_of_replacement = datetime.datetime.now() - existing_replacement['created_at']
            if age_of_replacement.days < SERVER_BUILD_TIMEOUT:
                raise Exception('Argument replace_again is not True but a '
                                'replacement already exists.')
            else:
                log.info("A replacement already exists, but was launched "
                         "{days} days ago. The timeout for servers builds is "
                         "{timeout} days so we are automatically setting "
                         "replace_again.".format(days=age_of_replacement.days,
                                                 timeout=SERVER_BUILD_TIMEOUT))
                replace_again = True

    # Check to see if MySQL is up on the host
    try:
        # This is not multi instance compatible. If we move to multiple
        # instances this will need to be updated
        conn = mysql_lib.connect_mysql(original_server)
        conn.close()
        dead_server = False
        version_server = original_server
    except pymysql.OperationalError as detail:
        dead_server = True
        (error_code, msg) = detail.args
        if error_code != mysql_lib.MYSQL_ERROR_CONN_HOST_ERROR:
            raise
        log.info('MySQL is down, assuming hardware failure')
        reasons.add('hardware failure')
        version_server = zk.get_mysql_instance_from_replica_set(original_server.get_zk_replica_set()[0],
                                                                repl_type=host_utils.REPLICA_ROLE_MASTER)

    # Pull some information from cmdb.
    cmdb_data = environment_specific.get_server_metadata(original_server.hostname)
    if not cmdb_data:
        raise Exception('Could not find information about server to be '
                        'replaced in the cmdb')

    if 'aws_status.codes' in cmdb_data:
        reasons.add(cmdb_data['aws_status.codes'])

    log.info('Data from cmdb: {cmdb_data}'.format(cmdb_data=cmdb_data))
    replacement_config = {'availability_zone': cmdb_data['location'],
                          'vpc_security_group': cmdb_data['security_groups'],
                          'hostname': find_unused_server_name(original_server.get_standardized_replica_set(),
                                                              reporting_conn, dry_run),
                          'instance_type': cmdb_data['config.instance_type'],
                          'mysql_major_version': mysql_lib.get_global_variables(version_server)['version'][0:3],
                          'mysql_minor_version': DEFAULT_MYSQL_MINOR_VERSION,
                          'dry_run': dry_run,
                          'skip_name_check': True}

    # At this point, all our defaults should be good to go
    config_overridden = False

    # All other overrides
    for key in overrides.keys():
        if key not in replacement_config:
            raise Exception('Invalid override {key}'.format(key=key))

        if overrides[key]:
            if replacement_config[key] == overrides[key]:
                log.info('Override for key {key} does not modify '
                         'configuration'.format(key=key))
            else:
                log.info('Overriding {key} to value {new} from {old}'
                         ''.format(key=key,
                                   old=replacement_config[key],
                                   new=overrides[key]))
                reasons.add('changing {key} from {old} to '
                            '{new}'.format(key=key,
                                           old=replacement_config[key],
                                           new=overrides[key]))
                replacement_config[key] = overrides[key]
                config_overridden = True

    if config_overridden:
        log.info('Configuration after overrides: {replacement_config}'
                 ''.format(replacement_config=replacement_config))

    if not dead_server:
        try:
            mysql_lib.assert_replication_sanity(original_server)
        except Exception as e:
            log.info('Replication problem: {e}'.format(e=e))
            reasons.add('replication broken')

    # If we get to here and there is no reason, bail out
    if not reasons and not replacement_config['dry_run']:
        raise Exception(('MySQL appears to be up and no reason for '
                         'replacement is supplied. You can specify a reason '
                         'with the --reason argument'))
    reason = ', '.join(reasons)
    log.info('Reason for launch: {reason}'.format(reason=reason))

    new_instance_id = launch_amazon_mysql_server.launch_amazon_mysql_server(**replacement_config)
    if not (replacement_config['dry_run'] or not_a_replacement):
        log_replacement_host(reporting_conn, cmdb_data, new_instance_id,
                             replace_again, replacement_config, reason)