示例#1
0
def __force_delete_instance(instance_id=None, region_id=None):
    """
    Force delete instance
    :param instance_id:
    :return:
    """
    flag = True
    count = 0
    response = None
    while flag and count < 5:
        try:
            response = instance.delete_instance(instance_id=instance_id,
                                                force=True,
                                                region_id=region_id)
            flag = False
        except Exception as del_e:
            err_msg = 'instance_id=%s, region_id=%s. \n\n Exception: \n %s' % (
                instance_id, region_id, del_e)
            logger.error(err_msg)
            notify.notify_admin(err_msg)
        count += 1
        time.sleep(5)

    logger.info(
        'Send message to delete instance (%s) success, request_id = %s' %
        (instance_id, response))
示例#2
0
def __update_host_failed(host_bean, host_dao, msg=None):
    """
    Update host status to request_failed and is_delete to true
    :param host_bean:
    :param host_dao:
    :return:
    """
    if msg:
        notify.notify_admin(msg)

    host_bean.status = host.HOST_STATUS_REQUEST_FAILED
    host_bean.is_deleted = True
    host_bean.touch()
    host_dao.save(host_bean)  # Save host to database
示例#3
0
def __create_demand_instance(image_id=None,
                             instance_type=None,
                             instance_name=None,
                             password=None,
                             security_group_id=None,
                             region_id=None,
                             vswitch_id=None,
                             user_data=None,
                             host_name=None,
                             system_disk_category=None,
                             system_disk_size=None):
    """
    Create demand instance which status is stopped.
    :param image_id:
    :param instance_type:
    :param instance_name:
    :param password:
    :param security_group_id:
    :param region_id:
    :param vswitch_id:
    :param user_data:
    :param host_name:
    :param system_disk_category:
    :param system_disk_size:
    :return: when success, return instance_id, when fail, return None or error message.
    """
    try:
        instance_id = instance.create_instance(
            image_id=image_id,
            instance_type=instance_type,
            instance_name=instance_name,
            password=password,
            security_group_id=security_group_id,
            region_id=region_id,
            vswitch_id=vswitch_id,
            user_data=user_data,
            host_name=host_name,
            system_disk_category=system_disk_category,
            system_disk_size=system_disk_size,
            io_optimized='optimized')

    except Exception as create_instance_e:
        err_msg = 'image_id=%s, instance_type=%s, instance_name=%s, region_id=%s, vswitch_id=%s, ' \
                  'host_name=%s \n\nException: \n %s' % (image_id, instance_type, instance_name, region_id,
                                                         vswitch_id, host_name, create_instance_e)
        logger.error(err_msg)
        notify.notify_admin(err_msg)
        instance_id = __retry_create_instance(create_instance_e.message)
    return instance_id
示例#4
0
def __scp_file(scp_cmd):
    """
    Copy file via command.
    :param scp_cmd:
    :return:
    """
    return_code = 1
    retry_num = 0

    while retry_num < 5 and return_code != 0:  # Retry 5 times
        try:
            return_code = subprocess.call(scp_cmd)
            logger.debug('scp file: %s, return_code: %s' %
                         (scp_cmd, return_code))
        except subprocess.CalledProcessError as scp_e:
            notify.notify_admin(scp_e)
            logger.error(scp_e)
        retry_num += 1
        time.sleep(5)

    return return_code
示例#5
0
def __init_instance(region_id, instance_id):
    """
    Init instance, scp file to instance and run it.
    :param region_id:
    :param instance_id:
    :return:
    """

    logger.info('Start init instance...')
    vpc_id = instance.get_vpc_ip_by_id(region_id=region_id,
                                       instance_id=instance_id)
    user = config.CLOUD_USER
    password = config.CLOUD_PASSWORD
    worker_profile = config.CLOUD_WORKER_PROFILE_PATH
    if not worker_profile:
        msg = 'Load workerprofile (%s) is error, please check the config file.' % worker_profile
        notify.notify_admin(msg)
        logger.error(msg)
        __force_delete_instance(instance_id, region_id)
        return
    file_name = os.path.basename(worker_profile)

    tmp_instance_path = '/tmp/%s_sge_woker_init.conf' % instance_id

    instance_info = {'instance_id': instance_id}
    with open(tmp_instance_path, 'w') as f:
        f.write(json.dumps(instance_info))

    instance_cmd = [
        'sshpass', '-p', password, 'scp', '-o UserKnownHostsFile=/dev/null',
        '-o StrictHostKeyChecking=no', tmp_instance_path,
        '%s@%s:/root/sge_woker_init.conf' % (user, vpc_id)
    ]

    __scp_file(instance_cmd)

    del_tmp_instance_path = ['rm', '-f', tmp_instance_path]

    try:
        subprocess.Popen(del_tmp_instance_path)  # Delete template file
    except Exception as del_e:
        logger.error(del_e)

    scp_cmd = [
        'sshpass', '-p', password, 'scp', '-o UserKnownHostsFile=/dev/null',
        '-o StrictHostKeyChecking=no', worker_profile,
        '%s@%s:/tmp/%s' % (user, vpc_id, file_name)
    ]

    return_code = __scp_file(scp_cmd)

    if 0 == return_code:
        logger.info('Scp %s to host success.' % worker_profile)
        exe_cmd = 'sh /tmp/%s' % file_name
        ssh = paramiko.SSHClient()
        try:
            ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
            ssh.connect(vpc_id, 22, user, password)
            stdin, stdout, stderr = ssh.exec_command(exe_cmd)
            channel = stdout.channel
            status = channel.recv_exit_status()
            if stdout:
                for out_line in stdout.readlines():
                    logger.info(out_line.strip('\n'))
            if stderr:
                for err_line in stderr.readlines():
                    logger.error(err_line.strip('\n'))
            if 0 == status:
                logger.info('Init workerprofile success...')
            else:
                logger.error(
                    'Init wokerprofile error, please check the workerprofile.sh script.'
                )
                __force_delete_instance(instance_id, region_id)
        except Exception as exec_e:
            logger.error(exec_e)
            __force_delete_instance(instance_id, region_id)
            raise Exception(exec_e)
        finally:
            ssh.close()
    else:
        raise Exception('Scp to worker error, cmd=%s' % scp_cmd)
示例#6
0
def add(host_class=None, is_spot=None, host_id=None, host_name=None):
    """
    Add instance.
    :param host_class:
    :param is_spot:
    :param host_id:
    :param host_name:
    :return:
    """

    logger.debug('**** Start update host status.')
    try:
        host_dao = HostDao()
        host_bean = host_dao.load(host_id)
        if host_bean:
            host_dao.update_status(host_id, host.HOST_STATUS_REQUESTING
                                   )  # Update host status to 'requesting'
        else:
            logger.error(
                'Load host (%s) error, please check the host info in database.'
                % host_id)
            return
    except Exception as ex:
        logger.error(ex)
        __update_host_failed(host_bean, host_dao, ex)
        return
    logger.debug('**** End update host status.')

    try:
        worker_profile_dao = WorkerProfileDao()
        worker_profile = worker_profile_dao.load(host_class)
    except Exception as ae:
        logger.error(ae)
        __update_host_failed(host_bean, host_dao, ae)
        return

    if not worker_profile:
        workerp_msg = 'Load workerprofile from database error, host_class is %s' % host_class
        logger.error(workerp_msg)
        __update_host_failed(host_bean, host_dao, workerp_msg)
        return

    logger.info('host_id=%s, host_name=%s, host_class=%s, is_spot=%s' %
                (host_id, host_name, host_class, is_spot))

    image_id = worker_profile.image_id
    instance_type = worker_profile.instance_type
    password = config.CLOUD_PASSWORD
    region_id = worker_profile.region_id
    vpc_id = worker_profile.vpc_id
    zone_id = worker_profile.zone_id
    system_disk_category = config.CLOUD_SYSTEM_DISK_CATEGORY
    system_disk_size = config.CLOUD_SYSTEM_DISK_SIZE

    security_group_id = worker_profile_dao.SEPARATOR.join(
        worker_profile.security_group_ids)
    user_data = None
    if worker_profile.user_data:
        user_data = worker_profile.user_data.encode('base64')

    if is_spot:  # Spot instance, TODO next
        pass
    else:  # Demand instance
        vs_default_id = __get_vswitch_by_zone_id(
            vpc_id=vpc_id, zone_id=zone_id,
            region_id=region_id)  # Default VSwitch
        if vs_default_id:  # Default VSwitch
            instance_id = __create_demand_instance(
                image_id=image_id,
                instance_type=instance_type,
                instance_name=host_name,
                password=password,
                security_group_id=security_group_id,
                region_id=region_id,
                vswitch_id=vs_default_id,
                user_data=user_data,
                host_name=host_name,
                system_disk_category=system_disk_category,
                system_disk_size=system_disk_size)

            if instance_id == INSTANCE_NOT_RESOURCE:  # Resource limit, retry other VSwitch
                logger.info(
                    'Default VSwitch (%s) has not resource types (%s), change other VSwitch.'
                    % (zone_id, instance_type))
                instance_id = __create_other_vswitch_instance(
                    image_id=image_id,
                    instance_type=instance_type,
                    instance_name=host_name,
                    password=password,
                    security_group_id=security_group_id,
                    region_id=region_id,
                    vpc_id=vpc_id,
                    default_zone_id=zone_id,
                    user_data=user_data,
                    host_name=host_name,
                    system_disk_size=system_disk_size,
                    system_disk_category=system_disk_category)
        else:  # Other VSwitch
            logger.info('Default VSwitch (%s) is None, change other VSwitch.' %
                        zone_id)
            instance_id = __create_other_vswitch_instance(
                image_id=image_id,
                instance_type=instance_type,
                instance_name=host_name,
                password=password,
                security_group_id=security_group_id,
                region_id=region_id,
                vpc_id=vpc_id,
                default_zone_id=zone_id,
                user_data=user_data,
                host_name=host_name,
                system_disk_size=system_disk_size,
                system_disk_category=system_disk_category)

    if instance_id and instance_id != INSTANCE_NOT_RESOURCE:
        logger.debug('Create instance success, instance_id=%s' % instance_id)
        try:
            host_bean.instance_id = instance_id
            host_bean.status = host.HOST_STATUS_BOOTING
            host_bean.region_id = region_id
            host_dao.save(host_bean)  # Save host to database

            running_flag = instance.check_host_status_to_running(
                region_id=region_id, instance_id=instance_id)

            if running_flag:  # Create instance success.
                host_bean.status = host.HOST_STATUS_SETTING_UP
                host_dao.save(host_bean)  # Save host to database
                if not user_data:
                    __init_instance(region_id=region_id,
                                    instance_id=instance_id)  # Init instance
            else:
                __update_host_failed(host_bean, host_dao, None)

        except Exception as ex:
            logger.error(ex)
            __force_delete_instance(instance_id, region_id)
            __update_host_failed(host_bean, host_dao, ex)
    else:
        __update_host_failed(host_bean, host_dao, None)
        err_msg = 'All zones has not instance type (%s), vpc=%s, please change other instance type.' \
                  % (instance_type, vpc_id)
        notify.notify_admin(err_msg)