Exemplo n.º 1
0
def log_to_retirement_queue(hostname, instance_id, activity):
    """ Add a record to the retirement queue log

    Args:
    hostname - The hostname of the server to be acted upon
    instance_id - The aws instance id
    activity - What is the state to log

    """
    reporting_conn = mysql_lib.get_mysqlops_connections()
    cursor = reporting_conn.cursor()

    # we are using a replace if we need to restart the process. That will
    # restart the clock on the replacement
    sql = ('REPLACE INTO mysqlops.retirement_queue '
           'SET '
           'hostname = %(hostname)s ,'
           'instance_id = %(instance_id)s, '
           'activity = %(activity)s, '
           'happened = now() ')
    cursor.execute(sql, {'hostname': hostname,
                         'instance_id': instance_id,
                         'activity': activity})
    log.info(cursor._executed)
    reporting_conn.commit()
Exemplo n.º 2
0
def get_protected_hosts(return_type='tuple'):
    """ Get data on all protected hosts

    Args:
    return_type - Options are:
                              'set'- return a set of protected hosts
                              'tuple' - returns all data regarding protected hosts

    Returns:
    A tuple which may be empty, with entries similar to:
    ({'protecting_user': '******', 'reason': 'because', 'hostname': 'sharddb-14-4'},
     {'protecting_user': '******', 'reason': 'because reasons', 'hostname': 'sharddb-14-5'})
    """
    if return_type != 'tuple' and return_type != 'set':
        raise Exception('Unsupported return_type '
                        '{return_type}'.format(return_type=return_type))

    reporting_conn = mysql_lib.get_mysqlops_connections()
    cursor = reporting_conn.cursor()
    sql = "SELECT * FROM mysqlops.retirement_protection"
    cursor.execute(sql)
    results = cursor.fetchall()

    if return_type == 'tuple':
        return results
    elif return_type == 'set':
        results_set = set()
        for entry in results:
            results_set.add(entry['hostname'])

        return results_set
Exemplo n.º 3
0
def protect_host(hostname, reason):
    """ Cause an host to not be acted on by the retirement queue

    Args:
    hostname - The hostname to protect
    reason -  An explanation for why this host should not be retired
    dry_run - If set, don't modify state
    """
    protecting_user = host_utils.get_user()
    if protecting_user == 'root':
        raise Exception('Can not modify retirement protection as root')

    reporting_conn = mysql_lib.get_mysqlops_connections()
    cursor = reporting_conn.cursor()
    sql = ("INSERT INTO mysqlops.retirement_protection "
           "SET "
           "hostname = %(hostname)s, "
           "reason = %(reason)s, "
           "protecting_user = %(protecting_user)s")
    cursor.execute(sql, {
        'hostname': hostname,
        'reason': reason,
        'protecting_user': protecting_user
    })
    reporting_conn.commit()
    log.info(cursor._executed)
Exemplo n.º 4
0
def log_to_retirement_queue(hostname, instance_id, activity):
    """ Add a record to the retirement queue log

    Args:
    hostname - The hostname of the server to be acted upon
    instance_id - The aws instance id
    activity - What is the state to log

    """
    reporting_conn = mysql_lib.get_mysqlops_connections()
    cursor = reporting_conn.cursor()

    # we are using a replace if we need to restart the process. That will
    # restart the clock on the replacement
    sql = ('REPLACE INTO mysqlops.retirement_queue '
           'SET '
           'hostname = %(hostname)s ,'
           'instance_id = %(instance_id)s, '
           'activity = %(activity)s, '
           'happened = now() ')
    cursor.execute(sql, {
        'hostname': hostname,
        'instance_id': instance_id,
        'activity': activity
    })
    log.info(cursor._executed)
    reporting_conn.commit()
Exemplo n.º 5
0
def get_protected_hosts(return_type='tuple'):
    """ Get data on all protected hosts

    Args:
    return_type - Options are:
                              'set'- return a set of protected hosts
                              'tuple' - returns all data regarding protected hosts

    Returns:
    A tuple which may be empty, with entries similar to:
    ({'protecting_user': '******', 'reason': 'because', 'hostname': 'sharddb-14-4'},
     {'protecting_user': '******', 'reason': 'because reasons', 'hostname': 'sharddb-14-5'})
    """
    if return_type != 'tuple' and return_type != 'set':
        raise Exception('Unsupported return_type '
                        '{return_type}'.format(return_type=return_type))

    reporting_conn = mysql_lib.get_mysqlops_connections()
    cursor = reporting_conn.cursor()
    sql = "SELECT * FROM mysqlops.retirement_protection"
    cursor.execute(sql)
    results = cursor.fetchall()

    if return_type == 'tuple':
        return results
    elif return_type == 'set':
        results_set = set()
        for entry in results:
            results_set.add(entry['hostname'])

        return results_set
Exemplo n.º 6
0
def is_host_in_retirement_queue(hostname):
    sql = ("SELECT hostname "
           "FROM mysqlops.retirement_queue "
           "WHERE hostname = %(hostname)s")
    reporting_conn = mysql_lib.get_mysqlops_connections()
    cursor = reporting_conn.cursor()
    cursor.execute(sql, {'hostname': hostname})
    return cursor.rowcount > 0
Exemplo n.º 7
0
def is_host_in_retirement_queue(hostname):
    sql = ("SELECT hostname "
           "FROM mysqlops.retirement_queue "
           "WHERE hostname = %(hostname)s")
    reporting_conn = mysql_lib.get_mysqlops_connections()
    cursor = reporting_conn.cursor()
    cursor.execute(sql, {'hostname': hostname})
    return cursor.rowcount > 0
def take_migration_lock(source_replica_set, destination_replica_set,
                        mig_dbs, non_mig_dbs):
    """ Take a migration lock to ensure no other migration are run concurrenly

    Args:
    source_replica_set - Which replica set to take the shards from
    destination_replica_set - Which replica set to put the shards on
    mig_dbs - The names of the databases which map to the shards which
              are being migrated
    non_mig_dbs - The names of the databases which are created with blackhole
                  tables for replication to function.

    Returns: a lock identifier
    """
    conn = mysql_lib.get_mysqlops_connections()
    cursor = conn.cursor()
    lock_identifier = str(uuid.uuid4())
    log.info('Migration lock identifier is {}'.format(lock_identifier))

    log.info('Checking existing locks')
    existing_lock = check_migration_lock(source_replica_set)
    if not existing_lock:
        existing_lock = check_migration_lock(destination_replica_set)
    if existing_lock:
        log.error('Lock is already held by {}'.format(existing_lock))
        log.error('You can abort this migration by running:')
        log.error('/usr/local/bin/mysql_utils/clean_up_unfinished_migration.py {}'
                  ''.format(existing_lock['source_replica_set']))
        raise Exception('Can not take migration lock')

    params = {'lock': lock_identifier,
              'source_replica_set': source_replica_set,
              'destination_replica_set': destination_replica_set,
              'mig_dbs': ', '.join(mig_dbs),
              'non_mig_dbs': ', '.join(non_mig_dbs),
              'status': STATUS_IMPORTING}

    # Todo: turn on locking checking, swich to INSERT
    sql = ("INSERT INTO mysqlops.mysql_migration_locks "
           "SET "
           "lock_identifier = %(lock)s, "
           "lock_active = 'active', "
           "created_at = NOW(), "
           "released = NULL, "
           "source_replica_set = %(source_replica_set)s, "
           "destination_replica_set = %(destination_replica_set)s, "
           "mig_databases = %(mig_dbs)s, "
           "non_mig_databases = %(non_mig_dbs)s, "
           "status = %(status)s ")
    cursor.execute(sql, params)
    conn.commit()
    log.info(cursor._executed)
    return lock_identifier
Exemplo n.º 9
0
def get_retirement_queue_servers(next_state):
    """ Pull instances in queue ready for termination

    Args:
    next_state - The desired next state of a server. Options are constants
                 SHUTDOWN_MYSQL and TERMINATE_INSTANCE.

    Returns:
    A dict of the same form as what is returned from the cmdbs
    """
    if next_state == SHUTDOWN_MYSQL:
        server_state = {'previous_state': RESET_STATS,
                        'next_state': SHUTDOWN_MYSQL}
    elif next_state == TERMINATE_INSTANCE:
        server_state = {'previous_state': SHUTDOWN_MYSQL,
                        'next_state': TERMINATE_INSTANCE}
    else:
        raise Exception('Invalid state param '
                        '"{next_state}"'.format(next_state=next_state))

    reporting_conn = mysql_lib.get_mysqlops_connections()
    cursor = reporting_conn.cursor()
    sql = ("SELECT t1.hostname, t1.instance_id "
           "FROM ( "
           "    SELECT hostname, instance_id "
           "    FROM mysqlops.retirement_queue "
           "    WHERE activity = %(previous_state)s "
           "    AND happened > now() - INTERVAL 3 WEEK "
           "    AND happened < now() - INTERVAL 1 DAY) t1 "
           "LEFT JOIN mysqlops.retirement_queue t2 on t1.instance_id = t2.instance_id "
           "AND t2.activity=%(next_state)s "
           "WHERE t2.hostname IS NULL;")
    cursor.execute(sql, server_state)
    instances = cursor.fetchall()

    all_servers = environment_specific.get_all_server_metadata()
    if len(all_servers) < MIN_CMDB_RESULTS:
        raise Exception('CMDB returned too few results')

    ret = dict()
    for instance in instances:
        if instance['hostname'] not in all_servers:
            log.error('Something killed {instance}, cleaning up '
                      'retirement queue now'.format(instance=instance))
            remove_from_retirement_queue(instance['hostname'])
        elif instance['instance_id'] != all_servers[instance['hostname']]['instance_id']:
            log.error('Possibly duplicate hostname for '
                      '{hostname}!'.format(hostname=instance['hostname']))
        else:
            ret[instance['hostname']] = all_servers[instance['hostname']]

    return ret
Exemplo n.º 10
0
def unprotect_host(hostname):
    """ Cause an host to able to be acted on by the retirement queue

    Args:
    hostname - The hostname to remove from protection
    """
    reporting_conn = mysql_lib.get_mysqlops_connections()
    cursor = reporting_conn.cursor()
    sql = ("DELETE FROM mysqlops.retirement_protection "
           "WHERE hostname = %(hostname)s")
    cursor.execute(sql, {'hostname': hostname})
    reporting_conn.commit()
    log.info(cursor._executed)
Exemplo n.º 11
0
def unprotect_host(hostname):
    """ Cause an host to able to be acted on by the retirement queue

    Args:
    hostname - The hostname to remove from protection
    """
    reporting_conn = mysql_lib.get_mysqlops_connections()
    cursor = reporting_conn.cursor()
    sql = ("DELETE FROM mysqlops.retirement_protection "
           "WHERE hostname = %(hostname)s")
    cursor.execute(sql, {'hostname': hostname})
    reporting_conn.commit()
    log.info(cursor._executed)
Exemplo n.º 12
0
def remove_from_retirement_queue(hostname):
    """ Remove an host from the retirement queue

    Args:
    hostname - the hostname to remove from the queue
    """
    reporting_conn = mysql_lib.get_mysqlops_connections()
    cursor = reporting_conn.cursor()

    sql = ('DELETE FROM mysqlops.retirement_queue '
           'WHERE hostname = %(hostname)s')
    cursor.execute(sql, {'hostname': hostname})
    log.info(cursor._executed)
    reporting_conn.commit()
Exemplo n.º 13
0
def remove_from_retirement_queue(hostname):
    """ Remove an host from the retirement queue

    Args:
    hostname - the hostname to remove from the queue
    """
    reporting_conn = mysql_lib.get_mysqlops_connections()
    cursor = reporting_conn.cursor()

    sql = ('DELETE FROM mysqlops.retirement_queue '
           'WHERE hostname = %(hostname)s')
    cursor.execute(sql, {'hostname': hostname})
    log.info(cursor._executed)
    reporting_conn.commit()
Exemplo n.º 14
0
def release_promotion_lock(lock_identifier):
    """ Release a promotion lock

    Args:
    lock_identifier - The lock to release
    """
    conn = mysql_lib.get_mysqlops_connections()
    cursor = conn.cursor()

    params = {'lock_identifier': lock_identifier}
    sql = ('UPDATE mysqlops.promotion_locks '
           'SET lock_active = NULL AND released = NOW() '
           'WHERE lock_identifier = %(lock_identifier)s')
    cursor.execute(sql, params)
    conn.commit()
    log.info(cursor._executed)
Exemplo n.º 15
0
def release_promotion_lock(lock_identifier):
    """ Release a promotion lock

    Args:
    lock_identifier - The lock to release
    """
    conn = mysql_lib.get_mysqlops_connections()
    cursor = conn.cursor()

    params = {'lock_identifier': lock_identifier}
    sql = ('UPDATE mysqlops.promotion_locks '
           'SET lock_active = NULL AND released = NOW() '
           'WHERE lock_identifier = %(lock_identifier)s')
    cursor.execute(sql, params)
    conn.commit()
    log.info(cursor._executed)
Exemplo n.º 16
0
def auto_add_instance_to_zk(port, dry_run):
    """ Try to do right thing in adding a server to zk

    Args:
    port - The port of replacement instance on localhost
    dry_run - If set, do not modify zk
    """
    instance = host_utils.HostAddr(':'.join([host_utils.HOSTNAME, str(port)]))
    try:
        conn = mysql_lib.get_mysqlops_connections()
        log.info('Determining replacement for port {}'.format(port))
        instance_id = host_utils.get_local_instance_id()
        role = determine_replacement_role(conn, instance_id)
        log.info('Adding server as role: {role}'.format(role=role))
    except Exception, e:
        log.exception(e)
        raise
Exemplo n.º 17
0
def auto_add_instance_to_zk(port, dry_run):
    """ Try to do right thing in adding a server to zk

    Args:
    port - The port of replacement instance on localhost
    dry_run - If set, do not modify zk
    """
    instance = host_utils.HostAddr(':'.join([host_utils.HOSTNAME, str(port)]))
    try:
        conn = mysql_lib.get_mysqlops_connections()
        log.info('Determining replacement for port {}'.format(port))
        instance_id = host_utils.get_local_instance_id()
        role = determine_replacement_role(conn, instance_id)
        log.info('Adding server as role: {role}'.format(role=role))
    except Exception, e:
        log.exception(e)
        raise
Exemplo n.º 18
0
def get_promotion_lock(replica_set):
    """ Take a promotion lock

    Args:
    replica_set - The replica set to take the lock against

    Returns:
    A unique identifer for the lock
    """
    lock_identifier = str(uuid.uuid4())
    log.info('Promotion lock identifier is '
             '{lock_identifier}'.format(lock_identifier=lock_identifier))

    conn = mysql_lib.get_mysqlops_connections()

    log.info('Releasing any expired locks')
    release_expired_promotion_locks(conn)

    log.info('Checking existing locks')
    check_promotion_lock(conn, replica_set)

    log.info('Taking lock against replica set: '
             '{replica_set}'.format(replica_set=replica_set))
    params = {
        'lock': lock_identifier,
        'localhost': host_utils.HOSTNAME,
        'replica_set': replica_set,
        'user': host_utils.get_user()
    }
    sql = ("INSERT INTO mysqlops.promotion_locks "
           "SET "
           "lock_identifier = %(lock)s, "
           "lock_active = 'active', "
           "created_at = NOW(), "
           "expires = NOW() + INTERVAL 12 HOUR, "
           "released = NULL, "
           "replica_set = %(replica_set)s, "
           "promoting_host = %(localhost)s, "
           "promoting_user = %(user)s ")
    cursor = conn.cursor()
    cursor.execute(sql, params)
    conn.commit()
    log.info(cursor._executed)
    return lock_identifier
Exemplo n.º 19
0
def get_promotion_lock(replica_set):
    """ Take a promotion lock

    Args:
    replica_set - The replica set to take the lock against

    Returns:
    A unique identifer for the lock
    """
    lock_identifier = str(uuid.uuid4())
    log.info('Promotion lock identifier is '
             '{lock_identifier}'.format(lock_identifier=lock_identifier))

    conn = mysql_lib.get_mysqlops_connections()

    log.info('Releasing any expired locks')
    release_expired_promotion_locks(conn)

    log.info('Checking existing locks')
    check_promotion_lock(conn, replica_set)

    log.info('Taking lock against replica set: '
             '{replica_set}'.format(replica_set=replica_set))
    params = {'lock': lock_identifier,
              'localhost': host_utils.HOSTNAME,
              'replica_set': replica_set,
              'user': host_utils.get_user()}
    sql = ("INSERT INTO mysqlops.promotion_locks "
           "SET "
           "lock_identifier = %(lock)s, "
           "lock_active = 'active', "
           "created_at = NOW(), "
           "expires = NOW() + INTERVAL 12 HOUR, "
           "released = NULL, "
           "replica_set = %(replica_set)s, "
           "promoting_host = %(localhost)s, "
           "promoting_user = %(user)s ")
    cursor = conn.cursor()
    cursor.execute(sql, params)
    conn.commit()
    log.info(cursor._executed)
    return lock_identifier
Exemplo n.º 20
0
def auto_add_instance_to_zk(instance, dry_run):
    """ Try to do right thing in adding a server to zk

    Args:
    instance - The replacement instance
    dry_run - If set, do not modify zk
    """
    try:
        conn = mysql_lib.get_mysqlops_connections()
        log.info('Determining replacement for '
                 '{hostname}'.format(hostname=instance.hostname))
        server_metadata = environment_specific.get_server_metadata(instance.hostname)
        if not server_metadata:
            raise Exception('CMDB lacks knowledge of replacement host')
        instance_id = server_metadata['id']
        role = determine_replacement_role(conn, instance_id)
        log.info('Adding server as role: {role}'.format(role=role))
    except Exception, e:
        log.exception(e)
        raise
def update_migration_status(lock_identifier, status):
    """ Update the migration lock table

    Args:
    lock_identifier - a lock id as returned by take_migration_lock
    status - The new status
    """
    conn = mysql_lib.get_mysqlops_connections()
    cursor = conn.cursor()

    params = {'lock': lock_identifier,
              'status': status}
    sql = ("UPDATE mysqlops.mysql_migration_locks "
           "SET "
           "status = %(status)s "
           "WHERE "
           "lock_identifier = %(lock)s ")
    cursor = conn.cursor()
    cursor.execute(sql, params)
    conn.commit()
    log.info(cursor._executed)
Exemplo n.º 22
0
def auto_add_instance_to_zk(instance, dry_run):
    """ Try to do right thing in adding a server to zk

    Args:
    instance - The replacement instance
    dry_run - If set, do not modify zk
    """
    try:
        conn = mysql_lib.get_mysqlops_connections()
        log.info('Determining replacement for '
                 '{hostname}'.format(hostname=instance.hostname))
        server_metadata = environment_specific.get_server_metadata(
            instance.hostname)
        if not server_metadata:
            raise Exception('CMDB lacks knowledge of replacement host')
        instance_id = server_metadata['id']
        role = determine_replacement_role(conn, instance_id)
        log.info('Adding server as role: {role}'.format(role=role))
    except Exception, e:
        log.exception(e)
        raise
Exemplo n.º 23
0
def protect_host(hostname, reason):
    """ Cause an host to not be acted on by the retirement queue

    Args:
    hostname - The hostname to protect
    reason -  An explanation for why this host should not be retired
    dry_run - If set, don't modify state
    """
    protecting_user = host_utils.get_user()
    if protecting_user == 'root':
        raise Exception('Can not modify retirement protection as root')

    reporting_conn = mysql_lib.get_mysqlops_connections()
    cursor = reporting_conn.cursor()
    sql = ("INSERT INTO mysqlops.retirement_protection "
           "SET "
           "hostname = %(hostname)s, "
           "reason = %(reason)s, "
           "protecting_user = %(protecting_user)s")
    cursor.execute(sql, {'hostname': hostname,
                         'reason': reason,
                         'protecting_user': protecting_user})
    reporting_conn.commit()
    log.info(cursor._executed)
def check_migration_lock(replica_set):
    """ Confirm there are no active locks that would block taking a
        migration lock

    Args:
    replica_set - A name of a replica set
    """
    conn = mysql_lib.get_mysqlops_connections()
    cursor = conn.cursor()
    params = {'replica_set': replica_set}
    sql = ('SELECT lock_identifier, '
           '       source_replica_set, '
           '       destination_replica_set, '
           '       mig_databases, '
           '       non_mig_databases, '
           '       status '
           'FROM mysqlops.mysql_migration_locks '
           "WHERE lock_active = 'active' AND "
           "( source_replica_set = %(replica_set)s OR"
           "  destination_replica_set = %(replica_set)s )")
    cursor.execute(sql, params)
    row = cursor.fetchone()
    log.info(cursor._executed)
    return row
def xtrabackup_backup_instance(instance):
    """ Run a file based backup on a supplied local instance

    Args:
    instance - A hostaddr object
    """
    starttime_sql = time.strftime('%Y-%m-%d %H:%M:%S')

    log.info('Logging initial status to mysqlops')
    row_id = None
    lock_handle = None
    try:
        reporting_conn = mysql_lib.get_mysqlops_connections()
        cursor = reporting_conn.cursor()

        sql = ("INSERT INTO mysqlops.mysql_backups "
               "SET "
               "hostname = %(hostname)s, "
               "port = %(port)s, "
               "started = %(started)s, "
               "backup_type = 'xbstream' ")

        metadata = {
            'hostname': instance.hostname,
            'port': instance.port,
            'started': starttime_sql
        }

        cursor.execute(sql, metadata)
        row_id = cursor.lastrowid
        reporting_conn.commit()
    except Exception as e:
        log.warning("Unable to write log entry to "
                    "mysqlopsdb001: {e}".format(e=e))
        log.warning("However, we will attempt to continue with the backup.")

    # Take a lock to prevent multiple backups from running concurrently
    try:
        log.info('Taking backup lock')
        lock_handle = host_utils.take_flock_lock(backup.BACKUP_LOCK_FILE)

        log.info('Cleaning up old backups')
        purge_mysql_backups.purge_mysql_backups(instance, skip_lock=True)

        # Actually run the backup
        log.info('Running backup')
        backup_file = backup.xtrabackup_instance(instance)
        finished = time.strftime('%Y-%m-%d %H:%M:%S')

        # Upload file to s3
        log.info('Uploading file to s3')
        backup.s3_upload(backup_file)

        # Update database with additional info now that backup is done.
        if row_id is None:
            log.info("The backup is complete, but we were not able to "
                     "write to the central log DB.")
        else:
            log.info("Updating database log entry with final backup info")
            try:
                sql = ("UPDATE mysqlops.mysql_backups "
                       "SET "
                       "filename = %(filename)s, "
                       "finished = %(finished)s, "
                       "size = %(size)s "
                       "WHERE id = %(id)s")
                metadata = {
                    'filename': backup_file,
                    'finished': finished,
                    'size': os.stat(backup_file).st_size,
                    'id': row_id
                }

                cursor.execute(sql, metadata)
                reporting_conn.commit()
                reporting_conn.close()
            except Exception as e:
                log.warning("Unable to update mysqlopsdb with "
                            "backup status: {e}".format(e=e))

            # Running purge again most for the chmod
        purge_mysql_backups.purge_mysql_backups(instance, skip_lock=True)
    finally:
        if lock_handle:
            log.info('Releasing lock')
            host_utils.release_flock_lock(lock_handle)
def launch_amazon_mysql_server(hostname,
                               instance_type,
                               vpc_security_group,
                               availability_zone,
                               ssh_group,
                               mysql_major_version,
                               mysql_minor_version,
                               os_flavor,
                               dry_run,
                               skip_name_check=False):
    """ Launch a mysql server in aws

    Args:
    hostname - hostname of new server
    instance_type - hardware type
    vpc_security_group - VPC firewall rules.
    availability_zone - AWS availability zone
    ssh_group - What IAM/SSH zone to use
    mysql_major_version - MySQL major version. Example 5.5 or 5.6
    mysql_minor_version - Which "branch" to use. Values are 'stable', 'staging'
                          and 'latest'.
    os_flavor - Which OS to target - 'precise' or 'trusty' at the moment
    dry_run - Do not actually launch a host, just show the expected config.
    skip_name_check - Do not check if a hostname has already been used or log
                      usage. The assumption is the caller has already done this

    Returns:
    An amazon instance id.
    """
    args, _, _, values = inspect.getargvalues(inspect.currentframe())
    for param in args:
        log.info("Requested {param} = {value}".format(param=param,
                                                      value=values[param]))

    if host_utils.get_security_role(
    ) not in environment_specific.ROLE_TO_LAUNCH_INSTANCE:
        raise Exception(environment_specific.ROLE_ERROR_MSG)

    config = {
        'key_name':
        environment_specific.PEM_KEY,
        'placement':
        availability_zone,
        'instance_profile_name':
        environment_specific.INSTANCE_PROFILE_NAME,
        'image_id':
        environment_specific.SUPPORTED_HARDWARE[instance_type]['ami']
        [os_flavor],
        'instance_type':
        instance_type
    }

    (subnet_name,
     config['subnet_id']) = get_subnet_from_sg(vpc_security_group,
                                               availability_zone)

    ssh_security = environment_specific.SSH_SECURITY_MAP[subnet_name]['ssh']
    config['instance_profile_name'] = environment_specific.SSH_SECURITY_MAP[
        subnet_name]['iam']
    config['security_group_ids'] = [
        environment_specific.VPC_SECURITY_GROUPS[vpc_security_group]
    ]

    if ssh_group:
        if ssh_group >= ssh_security and ssh_group in environment_specific.SSH_IAM_MAPPING.keys(
        ):
            ssh_security = ssh_group
            config[
                'instance_profile_name'] = environment_specific.SSH_IAM_MAPPING[
                    ssh_group]
        else:
            raise Exception(
                "We are not allowed to provision a host in {0} env "
                "with a weaker access policy than {1} it's existing or default "
                "config".format(ssh_group, ssh_security))

    hiera_config = environment_specific.HIERA_FORMAT.format(
        ssh_security=ssh_security,
        mysql_major_version=mysql_major_version.replace('.', ''),
        mysql_minor_version=mysql_minor_version)

    if hiera_config not in environment_specific.SUPPORTED_HIERA_CONFIGS:
        raise Exception(
            'Hiera config {hiera_config} is not supported.'
            'Supported configs are: {supported}'
            ''.format(hiera_config=hiera_config,
                      supported=environment_specific.SUPPORTED_HIERA_CONFIGS))

    config['user_data'] = ('#cloud-config\n'
                           'pinfo_team: {pinfo_team}\n'
                           'pinfo_env: {pinfo_env}\n'
                           'pinfo_role: {hiera_config}\n'
                           'hostname: {hostname}\n'
                           'raid: true\n'
                           'raid_fs: xfs\n'
                           'raid_mount: {raid_mount}'
                           ''.format(
                               pinfo_team=environment_specific.PINFO_TEAM,
                               pinfo_env=environment_specific.PINFO_ENV,
                               raid_mount=environment_specific.RAID_MOUNT,
                               hiera_config=hiera_config,
                               hostname=hostname))

    log.info('Config for new server:\n{config}'.format(config=config))
    conn = mysql_lib.get_mysqlops_connections()
    if not skip_name_check and not launch_replacement_db_host.is_hostname_new(
            hostname, conn):
        raise Exception('Hostname {hostname} has already been used!'
                        ''.format(hostname=hostname))
    if dry_run:
        log.info('In dry run mode, returning now')
        return
    else:
        conn = boto.ec2.connect_to_region(environment_specific.EC2_REGION)
        instance_id = conn.run_instances(**config).instances[0].id
        log.info('Launched instance {id}'.format(id=instance_id))
        return instance_id
def launch_replacement_db_host(original_server,
                               dry_run=False,
                               not_a_replacement=False,
                               overrides=dict(),
                               reason='',
                               replace_again=False):
    """ Launch a replacement db server

    Args:
    original_server - A hostAddr object for the server to be replaced
    dry_run - If True, do not actually launch a replacement
    not_a_replacement - If set, don't log the replacement, therefore
                        automation won't put it into prod use.
    overrides - A dict of overrides. Availible keys are
                'mysql_minor_version', 'hostname', 'vpc_security_group',
                'availability_zone', 'instance_type', and 'mysql_major_version'.
    reason - A description of why the host is being replaced. If the instance
             is still accessible and reason is not supply an exception will be
             thrown.
    replace_again - If True, ignore already existing replacements.
    """
    reasons = set()
    if reason:
        reasons.add(reason)

    log.info('Trying to launch a replacement for host {host} which is part '
             'of replica set is {replica_set}'.format(
                 host=original_server.hostname,
                 replica_set=original_server.get_zk_replica_set()[0]))

    zk = host_utils.MysqlZookeeper()
    try:
        (_, replica_type) = zk.get_replica_set_from_instance(original_server)
    except:
        raise Exception('Can not replace an instance which is not in zk')
    if replica_type == host_utils.REPLICA_ROLE_MASTER:
        # If the instance, we will refuse to run. No ifs, ands, or buts/
        raise Exception('Can not replace an instance which is a master in zk')

    # Open a connection to MySQL Ops and check if a replacement has already
    # been requested
    reporting_conn = mysql_lib.get_mysqlops_connections()
    existing_replacement = find_existing_replacements(reporting_conn,
                                                      original_server)
    if existing_replacement and not not_a_replacement:
        log.info('A replacement has already been requested: '
                 '{re}'.format(re=existing_replacement))
        if replace_again:
            log.info('Argument replace_again is set, continuing on.')
        else:
            age_of_replacement = datetime.datetime.now(
            ) - existing_replacement['created_at']
            if age_of_replacement.days < SERVER_BUILD_TIMEOUT:
                raise Exception('Argument replace_again is not True but a '
                                'replacement already exists.')
            else:
                log.info("A replacement already exists, but was launched "
                         "{days} days ago. The timeout for servers builds is "
                         "{timeout} days so we are automatically setting "
                         "replace_again.".format(days=age_of_replacement.days,
                                                 timeout=SERVER_BUILD_TIMEOUT))
                replace_again = True

    # Check to see if MySQL is up on the host
    try:
        # This is not multi instance compatible. If we move to multiple
        # instances this will need to be updated
        conn = mysql_lib.connect_mysql(original_server)
        conn.close()
        dead_server = False
        version_server = original_server
    except MySQLdb.OperationalError as detail:
        dead_server = True
        (error_code, msg) = detail.args
        if error_code != mysql_lib.MYSQL_ERROR_CONN_HOST_ERROR:
            raise
        log.info('MySQL is down, assuming hardware failure')
        reasons.add('hardware failure')
        version_server = zk.get_mysql_instance_from_replica_set(
            original_server.get_zk_replica_set()[0],
            repl_type=host_utils.REPLICA_ROLE_MASTER)

    # Pull some information from cmdb.
    cmdb_data = environment_specific.get_server_metadata(
        original_server.hostname)
    if not cmdb_data:
        raise Exception('Could not find information about server to be '
                        'replaced in the cmdb')

    if 'aws_status.codes' in cmdb_data:
        reasons.add(cmdb_data['aws_status.codes'])

    log.info('Data from cmdb: {cmdb_data}'.format(cmdb_data=cmdb_data))
    replacement_config = {
        'availability_zone':
        cmdb_data['location'],
        'vpc_security_group':
        cmdb_data['security_groups'],
        'hostname':
        find_unused_server_name(original_server.get_standardized_replica_set(),
                                reporting_conn, dry_run),
        'instance_type':
        cmdb_data['config.instance_type'],
        'mysql_major_version':
        mysql_lib.get_global_variables(version_server)['version'][0:3],
        'mysql_minor_version':
        DEFAULT_MYSQL_MINOR_VERSION,
        'dry_run':
        dry_run,
        'skip_name_check':
        True
    }

    # At this point, all our defaults should be good to go
    config_overridden = False

    # All other overrides
    for key in overrides.keys():
        if key not in replacement_config:
            raise Exception('Invalid override {key}'.format(key=key))

        if overrides[key]:
            if replacement_config[key] == overrides[key]:
                log.info('Override for key {key} does not modify '
                         'configuration'.format(key=key))
            else:
                log.info('Overriding {key} to value {new} from {old}'
                         ''.format(key=key,
                                   old=replacement_config[key],
                                   new=overrides[key]))
                reasons.add('changing {key} from {old} to '
                            '{new}'.format(key=key,
                                           old=replacement_config[key],
                                           new=overrides[key]))
                replacement_config[key] = overrides[key]
                config_overridden = True

    if config_overridden:
        log.info('Configuration after overrides: {replacement_config}'
                 ''.format(replacement_config=replacement_config))

    if not dead_server:
        try:
            mysql_lib.assert_replication_sanity(original_server)
        except Exception as e:
            log.info('Replication problem: {e}'.format(e=e))
            reasons.add('replication broken')

    # If we get to here and there is no reason, bail out
    if not reasons and not replacement_config['dry_run']:
        raise Exception(('MySQL appears to be up and no reason for '
                         'replacement is supplied. You can specify a reason '
                         'with the --reason argument'))
    reason = ', '.join(reasons)
    log.info('Reason for launch: {reason}'.format(reason=reason))

    new_instance_id = launch_amazon_mysql_server.launch_amazon_mysql_server(
        **replacement_config)
    if not (replacement_config['dry_run'] or not_a_replacement):
        log_replacement_host(reporting_conn, cmdb_data, new_instance_id,
                             replace_again, replacement_config, reason)
def launch_replacement_db_host(original_server,
                               dry_run=False,
                               not_a_replacement=False,
                               overrides=dict(),
                               reason='',
                               replace_again=False):
    """ Launch a replacement db server

    Args:
    original_server - A hostAddr object for the server to be replaced
    dry_run - If True, do not actually launch a replacement
    not_a_replacement - If set, don't log the replacement, therefore
                        automation won't put it into prod use.
    overrides - A dict of overrides. Availible keys are
                'mysql_minor_version', 'hostname', 'vpc_security_group',
                'availability_zone', 'classic_security_group',
                'instance_type', and 'mysql_major_version'.
    reason - A description of why the host is being replaced. If the instance
             is still accessible and reason is not supply an exception will be
             thrown.
    replace_again - If True, ignore already existing replacements.
    """
    reasons = set()
    if reason:
        reasons.add(reason)

    log.info('Trying to launch a replacement for host {host} which is part '
             'of replica set is {replica_set}'.format(host=original_server.hostname,
                                                      replica_set=original_server.get_zk_replica_set()[0]))

    zk = host_utils.MysqlZookeeper()
    try:
        (_, replica_type) = zk.get_replica_set_from_instance(original_server)
    except:
        raise Exception('Can not replace an instance which is not in zk')
    if replica_type == host_utils.REPLICA_ROLE_MASTER:
        # If the instance, we will refuse to run. No ifs, ands, or buts/
        raise Exception('Can not replace an instance which is a master in zk')

    # Open a connection to MySQL Ops and check if a replacement has already
    # been requested
    reporting_conn = mysql_lib.get_mysqlops_connections()
    existing_replacement = find_existing_replacements(reporting_conn,
                                                      original_server)
    if existing_replacement and not not_a_replacement:
        log.info('A replacement has already been requested: '
                 '{re}'.format(re=existing_replacement))
        if replace_again:
            log.info('Argument replace_again is set, continuing on.')
        else:
            age_of_replacement = datetime.datetime.now() - existing_replacement['created_at']
            if age_of_replacement.days < SERVER_BUILD_TIMEOUT:
                raise Exception('Argument replace_again is not True but a '
                                'replacement already exists.')
            else:
                log.info("A replacement already exists, but was launched "
                         "{days} ago. The timeout for servers builds is "
                         "{timeout} so we are automatically setting "
                         "replace_again.".format(days=age_of_replacement.days,
                                                 timeout=SERVER_BUILD_TIMEOUT))
                replace_again = True

    # Pull some information from cmdb.
    cmdb_data = environment_specific.get_server_metadata(original_server.hostname)
    if not cmdb_data:
        raise Exception('Could not find information about server to be '
                        'replaced in the cmdb')

    if 'aws_status.codes' in cmdb_data:
        reasons.add(cmdb_data['aws_status.codes'])

    log.info('Data from cmdb: {cmdb_data}'.format(cmdb_data=cmdb_data))
    replacement_config = {'availability_zone': cmdb_data['location'],
                          'hostname': find_unused_server_name(original_server.get_standardized_replica_set(),
                                                              reporting_conn, dry_run),
                          'instance_type': cmdb_data['config.instance_type'],
                          'mysql_major_version': get_master_mysql_major_version(original_server),
                          'mysql_minor_version': DEFAULT_MYSQL_MINOR_VERSION,
                          'dry_run': dry_run,
                          'skip_name_check': True}

    if cmdb_data.pop('cloud.aws.vpc_id', None):
        # Existing server is in VPC
        replacement_config['classic_security_group'] = None
        replacement_config['vpc_security_group'] = cmdb_data['security_groups']
    else:
        # Existing server is in Classic
        replacement_config['classic_security_group'] = cmdb_data['security_groups']
        replacement_config['vpc_security_group'] = None

    # At this point, all our defaults should be good to go
    config_overridden = False
    if replacement_config['classic_security_group'] and overrides['vpc_security_group']:
        # a VPC migration
        vpc_migration(replacement_config, overrides)
        reasons.add('vpc migration')
        config_overridden = True

    # All other overrides
    for key in overrides.keys():
        if key not in replacement_config:
            raise Exception('Invalid override {key}'.format(key=key))

        if overrides[key]:
            if replacement_config[key] == overrides[key]:
                log.info('Override for key {key} does not modify '
                         'configuration'.format(key=key))
            else:
                log.info('Overriding {key} to value {new} from {old}'
                         ''.format(key=key,
                                   old=replacement_config[key],
                                   new=overrides[key]))
                replacement_config[key] = overrides[key]
                reasons.add('changing {key} from {old} to '
                            '{old}'.format(key=key,
                                           old=replacement_config[key],
                                           new=overrides[key]))
                config_overridden = True

    if config_overridden:
        log.info('Configuration after overrides: {replacement_config}'
                 ''.format(replacement_config=replacement_config))

    # Check to see if MySQL is up on the host
    try:
        # This is not multi instance compatible. If we move to multiple
        # instances this will need to be updated
        conn = mysql_lib.connect_mysql(original_server)
        conn.close()
        dead_server = False
    except MySQLdb.OperationalError as detail:
        dead_server = True
        (error_code, msg) = detail.args
        if error_code != mysql_lib.MYSQL_ERROR_CONN_HOST_ERROR:
            raise
        log.info('MySQL is down, assuming hardware failure')
        reasons.add('hardware failure')

    if not dead_server:
        slave_status = mysql_lib.calc_slave_lag(original_server)
        if slave_status['ss']['Slave_SQL_Running'] != 'Yes':
            reasons.add('sql replication thread broken')

        if slave_status['ss']['Slave_IO_Running'] != 'Yes':
            reasons.add('io replication thread broken')

    # If we get to here and there is no reason, bail out
    if not reasons and not replacement_config['dry_run']:
        raise Exception(('MySQL appears to be up and no reason for '
                         'replacement is supplied. You can specify a reason'
                         'with the --reason argument'))
    reason = ', '.join(reasons)
    log.info('Reason for launch: {reason}'.format(reason=reason))

    new_instance_id = launch_amazon_mysql_server.launch_amazon_mysql_server(**replacement_config)
    if not (replacement_config['dry_run'] or not_a_replacement):
        log_replacement_host(reporting_conn, cmdb_data, new_instance_id,
                             replace_again, replacement_config, reason)
Exemplo n.º 29
0
def launch_amazon_mysql_server(hostname,
                               instance_type,
                               vpc_security_group,
                               classic_security_group,
                               availability_zone,
                               mysql_major_version,
                               mysql_minor_version,
                               dry_run,
                               skip_name_check=False):
    """ Launch a mysql server in aws

    Args:
    hostname - hostname of new server
    instance_type - hardware type
    vpc_security_group - VPC firewall rules. This or classic_security_group
                         must be supplied, but not both.
    classic_security_group - AWS classic firewall rules. See vpc_security_group
    availability_zone - AWS availability zone
    mysql_major_version - MySQL major version. Example 5.5 or 5.6
    mysql_minor_version - Which "branch" to use. Values are 'stable', 'staging'
                          and 'latest'.
    dry_run - Do not actually launch a host, just show the expected config.
    skip_name_check - Do not check if a hostname has already been used or log
                      usage. The assumption is the caller has already done this

    Returns:
    An amazon instance id.
    """
    args, _, _, values = inspect.getargvalues(inspect.currentframe())
    for param in args:
        log.info("Requested {param} = {value}".format(param=param,
                                                      value=values[param]))

    config = {
        'key_name': environment_specific.PEM_KEY,
        'placement': availability_zone,
        'instance_profile_name': environment_specific.INSTANCE_PROFILE_NAME,
        'image_id':
        environment_specific.SUPPORTED_HARDWARE[instance_type]['ami'],
        'instance_type': instance_type
    }

    if vpc_security_group and not classic_security_group:
        (subnet_name, config['subnet_id']) = \
            get_subnet_from_sg(vpc_security_group, availability_zone)
        ssh_security = environment_specific.SSH_SECURITY_MAP[subnet_name][
            'ssh']
        config[
            'instance_profile_name'] = environment_specific.SSH_SECURITY_MAP[
                subnet_name]['iam']
        config['security_group_ids'] = [
            environment_specific.VPC_SECURITY_GROUPS[vpc_security_group]
        ]
    elif classic_security_group and not vpc_security_group:
        config['security_groups'] = [classic_security_group]
        if classic_security_group in environment_specific.CLASSIC_SECURE_SG:
            ssh_security = environment_specific.SSH_SECURITY_SECURE
        else:
            ssh_security = environment_specific.SSH_SECURITY_DEV
        config[
            'instance_profile_name'] = environment_specific.INSTANCE_PROFILE_NAME
    else:
        raise Exception('One and only one of vpc_security_group and '
                        'classic_security_group must be specified. Received:\n'
                        'vpc_security_group: {vpc}, \n'
                        'classic_security_group: {classic_security_group}'
                        ''.format(
                            vpc=vpc_security_group,
                            classic_security_group=classic_security_group))

    hiera_config = environment_specific.HIERA_FORMAT.format(
        ssh_security=ssh_security,
        mysql_major_version=mysql_major_version.replace('.', ''),
        mysql_minor_version=mysql_minor_version)
    if hiera_config not in environment_specific.SUPPORTED_HIERA_CONFIGS:
        raise Exception(
            'Hiera config {hiera_config} is not supported.'
            'Supported configs are: {supported}'
            ''.format(hiera_config=hiera_config,
                      supported=environment_specific.SUPPORTED_HIERA_CONFIGS))
    config['user_data'] = ('#cloud-config\n'
                           'pinfo_team: {pinfo_team}\n'
                           'pinfo_env: {pinfo_env}\n'
                           'pinfo_role: {hiera_config}\n'
                           'hostname: {hostname}\n'
                           'raid: true\n'
                           'raid_fs: xfs\n'
                           'raid_mount: {raid_mount}'
                           ''.format(
                               pinfo_team=environment_specific.PINFO_TEAM,
                               pinfo_env=environment_specific.PINFO_ENV,
                               raid_mount=environment_specific.RAID_MOUNT,
                               hiera_config=hiera_config,
                               hostname=hostname))

    log.info('Config for new server:\n{config}'.format(config=config))
    conn = mysql_lib.get_mysqlops_connections()
    if not skip_name_check and not launch_replacement_db_host.is_hostname_new(
            hostname, conn):
        raise Exception('Hostname {hostname} has already been used!'
                        ''.format(hostname=hostname))
    if dry_run:
        log.info('In dry run mode, returning now')
        return
    else:
        conn = boto.ec2.connect_to_region(environment_specific.EC2_REGION)
        instance_id = conn.run_instances(**config).instances[0].id
        log.info('Launched instance {id}'.format(id=instance_id))
        return instance_id
Exemplo n.º 30
0
def xtrabackup_backup_instance(instance):
    """ Run a file based backup on a supplied local instance

    Args:
    instance - A hostaddr object
    """
    starttime_sql = time.strftime('%Y-%m-%d %H:%M:%S')

    log.info('Logging initial status to mysqlops')
    row_id = None
    lock_handle = None
    try:
        reporting_conn = mysql_lib.get_mysqlops_connections()
        cursor = reporting_conn.cursor()

        sql = ("INSERT INTO mysqlops.mysql_backups "
               "SET "
               "hostname = %(hostname)s, "
               "port = %(port)s, "
               "started = %(started)s, "
               "backup_type = 'xbstream' ")

        metadata = {'hostname': instance.hostname,
                    'port': instance.port,
                    'started': starttime_sql}

        cursor.execute(sql, metadata)
        row_id = cursor.lastrowid
        reporting_conn.commit()
    except Exception as e:
        log.warning("Unable to write log entry to "
                    "mysqlopsdb001: {e}".format(e=e))
        log.warning("However, we will attempt to continue with the backup.")

    # Take a lock to prevent multiple backups from running concurrently
    try:
        log.info('Taking backup lock')
        lock_handle = host_utils.take_flock_lock(backup.BACKUP_LOCK_FILE)

        log.info('Cleaning up old backups')
        purge_mysql_backups.purge_mysql_backups(instance, skip_lock=True)

        # Actually run the backup
        log.info('Running backup')
        backup_file = backup.xtrabackup_instance(instance)
        finished = time.strftime('%Y-%m-%d %H:%M:%S')

        # Upload file to s3
        log.info('Uploading file to s3')
        backup.s3_upload(backup_file)

        # Update database with additional info now that backup is done.
        if row_id is None:
            log.info("The backup is complete, but we were not able to "
                     "write to the central log DB.")
        else:
            log.info("Updating database log entry with final backup info")
            try:
                sql = ("UPDATE mysqlops.mysql_backups "
                       "SET "
                       "filename = %(filename)s, "
                       "finished = %(finished)s, "
                       "size = %(size)s "
                       "WHERE id = %(id)s")
                metadata = {'filename': backup_file,
                            'finished': finished,
                            'size': os.stat(backup_file).st_size,
                            'id': row_id}

                cursor.execute(sql, metadata)
                reporting_conn.commit()
                reporting_conn.close()
            except Exception as e:
                log.warning("Unable to update mysqlopsdb with "
                            "backup status: {e}".format(e=e))

            # Running purge again most for the chmod
        purge_mysql_backups.purge_mysql_backups(instance, skip_lock=True)
    finally:
        if lock_handle:
            log.info('Releasing lock')
            host_utils.release_flock_lock(lock_handle)
Exemplo n.º 31
0
def launch_replacement_db_host(original_server,
                               dry_run=False,
                               not_a_replacement=False,
                               overrides=dict(),
                               reason='',
                               replace_again=False):
    """ Launch a replacement db server

    Args:
    original_server - A hostAddr object for the server to be replaced
    dry_run - If True, do not actually launch a replacement
    not_a_replacement - If set, don't log the replacement, therefore
                        automation won't put it into prod use.
    overrides - A dict of overrides. Availible keys are
                'mysql_minor_version', 'hostname', 'vpc_security_group',
                'availability_zone', 'classic_security_group',
                'instance_type', and 'mysql_major_version'.
    reason - A description of why the host is being replaced. If the instance
             is still accessible and reason is not supply an exception will be
             thrown.
    replace_again - If True, ignore already existing replacements.
    """
    reasons = set()
    if reason:
        reasons.add(reason)

    log.info('Trying to launch a replacement for host {host} which is part '
             'of replica set is {replica_set}'.format(host=original_server.hostname,
                                                      replica_set=original_server.get_zk_replica_set()[0]))

    zk = host_utils.MysqlZookeeper()
    try:
        (_, replica_type) = zk.get_replica_set_from_instance(original_server)
    except:
        raise Exception('Can not replace an instance which is not in zk')
    if replica_type == host_utils.REPLICA_ROLE_MASTER:
        # If the instance, we will refuse to run. No ifs, ands, or buts/
        raise Exception('Can not replace an instance which is a master in zk')

    # Open a connection to MySQL Ops and check if a replacement has already
    # been requested
    reporting_conn = mysql_lib.get_mysqlops_connections()
    existing_replacement = find_existing_replacements(reporting_conn,
                                                      original_server)
    if existing_replacement and not not_a_replacement:
        if replace_again:
            log.info('A replacement has already been requested: '
                     '{new_host}'.format(new_host=existing_replacement))
        else:
            raise Exception('A replacement already exists, but '
                            'replace_again is not True')

    # Pull some information from cmdb.
    cmdb_data = environment_specific.get_server_metadata(original_server.hostname)
    if not cmdb_data:
        raise Exception('Could not find information about server to be '
                        'replaced in the cmdb')

    log.info('Data from cmdb: {cmdb_data}'.format(cmdb_data=cmdb_data))
    replacement_config = {'availability_zone': cmdb_data['location'],
                          'hostname': find_unused_server_name(original_server.get_standardized_replica_set(),
                                                              reporting_conn, dry_run),
                          'instance_type': cmdb_data['config.instance_type'],
                          'mysql_major_version': get_master_mysql_major_version(original_server),
                          'mysql_minor_version': DEFAULT_MYSQL_MINOR_VERSION,
                          'dry_run': dry_run,
                          'skip_name_check': True}

    if cmdb_data.pop('cloud.aws.vpc_id', None):
        # Existing server is in VPC
        replacement_config['classic_security_group'] = None
        replacement_config['vpc_security_group'] = cmdb_data['security_groups']
    else:
        # Existing server is in Classic
        replacement_config['classic_security_group'] = cmdb_data['security_groups']
        replacement_config['vpc_security_group'] = None

    # At this point, all our defaults should be good to go
    config_overridden = False
    if replacement_config['classic_security_group'] and overrides['vpc_security_group']:
        # a VPC migration
        vpc_migration(replacement_config, overrides)
        reasons.add('vpc migration')
        config_overridden = True

    # All other overrides
    for key in overrides.keys():
        if key not in replacement_config:
            raise Exception('Invalid override {key}'.format(key=key))

        if overrides[key]:
            if replacement_config[key] == overrides[key]:
                log.info('Override for key {key} does not modify '
                         'configuration'.format(key=key))
            else:
                log.info('Overriding {key} to value {new} from {old}'
                         ''.format(key=key,
                                   old=replacement_config[key],
                                   new=overrides[key]))
                replacement_config[key] = overrides[key]
                reasons.add('changing {key} from {old} to '
                            '{old}'.format(key=key,
                                           old=replacement_config[key],
                                           new=overrides[key]))
                config_overridden = True

    if config_overridden:
        log.info('Configuration after overrides: {replacement_config}'
                 ''.format(replacement_config=replacement_config))

    # Check to see if MySQL is up on the host
    try:
        # This is not multi instance compatible. If we move to multiple
        # instances this will need to be updated
        conn = mysql_lib.connect_mysql(original_server)
        conn.close()
        dead_server = False
    except MySQLdb.OperationalError as detail:
        dead_server = True
        (error_code, msg) = detail.args
        if error_code != mysql_lib.MYSQL_ERROR_CONN_HOST_ERROR:
            raise
        log.info('MySQL is down, assuming hardware failure')
        reasons.add('hardware failure')

    if not dead_server:
        slave_status = mysql_lib.calc_slave_lag(original_server)
        if slave_status['ss']['Slave_SQL_Running'] != 'Yes':
            reasons.add('sql replication thread broken')

        if slave_status['ss']['Slave_IO_Running'] != 'Yes':
            reasons.add('io replication thread broken')

    # If we get to here and there is no reason, bail out
    if not reasons and not replacement_config['dry_run']:
        raise Exception(('MySQL appears to be up and no reason for '
                         'replacement is supplied'))
    reason = ', '.join(reasons)
    log.info('Reason for launch: {reason}'.format(reason=reason))

    new_instance_id = launch_amazon_mysql_server.launch_amazon_mysql_server(**replacement_config)
    if not (replacement_config['dry_run'] or not_a_replacement):
        log_replacement_host(reporting_conn, cmdb_data, new_instance_id,
                             replace_again, replacement_config, reason)
def launch_amazon_mysql_server(hostname, instance_type, vpc_security_group,
                               availability_zone, mysql_major_version, mysql_minor_version,
                               dry_run, skip_name_check=False):
    """ Launch a mysql server in aws

    Args:
    hostname - hostname of new server
    instance_type - hardware type
    vpc_security_group - VPC firewall rules.
    availability_zone - AWS availability zone
    mysql_major_version - MySQL major version. Example 5.5 or 5.6
    mysql_minor_version - Which "branch" to use. Values are 'stable', 'staging'
                          and 'latest'.
    dry_run - Do not actually launch a host, just show the expected config.
    skip_name_check - Do not check if a hostname has already been used or log
                      usage. The assumption is the caller has already done this

    Returns:
    An amazon instance id.
    """
    args, _, _, values = inspect.getargvalues(inspect.currentframe())
    for param in args:
        log.info("Requested {param} = {value}".format(param=param,
                                                      value=values[param]))

    config = {'key_name': environment_specific.PEM_KEY,
              'placement': availability_zone,
              'instance_profile_name': environment_specific.INSTANCE_PROFILE_NAME,
              'image_id': environment_specific.SUPPORTED_HARDWARE[instance_type]['ami'],
              'instance_type': instance_type}

    (subnet_name, config['subnet_id']) = get_subnet_from_sg(vpc_security_group,
                                                            availability_zone)
    ssh_security = environment_specific.SSH_SECURITY_MAP[subnet_name]['ssh']
    config['instance_profile_name'] = environment_specific.SSH_SECURITY_MAP[subnet_name]['iam']
    config['security_group_ids'] = [environment_specific.VPC_SECURITY_GROUPS[vpc_security_group]]

    hiera_config = environment_specific.HIERA_FORMAT.format(
        ssh_security=ssh_security,
        mysql_major_version=mysql_major_version.replace('.', ''),
        mysql_minor_version=mysql_minor_version)

    if hiera_config not in environment_specific.SUPPORTED_HIERA_CONFIGS:
        raise Exception('Hiera config {hiera_config} is not supported.'
                        'Supported configs are: {supported}'
                        ''.format(hiera_config=hiera_config,
                                  supported=environment_specific.SUPPORTED_HIERA_CONFIGS))
    config['user_data'] = ('#cloud-config\n'
                           'pinfo_team: {pinfo_team}\n'
                           'pinfo_env: {pinfo_env}\n'
                           'pinfo_role: {hiera_config}\n'
                           'hostname: {hostname}\n'
                           'raid: true\n'
                           'raid_fs: xfs\n'
                           'raid_mount: {raid_mount}'
                           ''.format(pinfo_team=environment_specific.PINFO_TEAM,
                                     pinfo_env=environment_specific.PINFO_ENV,
                                     raid_mount=environment_specific.RAID_MOUNT,
                                     hiera_config=hiera_config,
                                     hostname=hostname))

    log.info('Config for new server:\n{config}'.format(config=config))
    conn = mysql_lib.get_mysqlops_connections()
    if not skip_name_check and not launch_replacement_db_host.is_hostname_new(hostname, conn):
        raise Exception('Hostname {hostname} has already been used!'
                        ''.format(hostname=hostname))
    if dry_run:
        log.info('In dry run mode, returning now')
        return
    else:
        conn = boto.ec2.connect_to_region(environment_specific.EC2_REGION)
        instance_id = conn.run_instances(**config).instances[0].id
        log.info('Launched instance {id}'.format(id=instance_id))
        return instance_id