Пример #1
0
def check_pool_state(pool_name, **kwargs):
    """

    :param pool_name:
    :param kwargs:
    :return:
    """
    down_servers = kwargs.get('down_servers')
    disabled_servers = kwargs.get('disabled_servers')
    error_string = kwargs.get('error_string')
    retry_timeout = int(kwargs.get('retry_timeout', 10))
    retry_interval = float(kwargs.get('retry_interval', 1))
    try:

        @logger_utils.aretry(delay=retry_interval, period=retry_timeout)
        def retry_action():
            return is_pool_servers_in_state(pool_name, down_servers,
                                            disabled_servers, error_string)

        return retry_action()

    except Exception as e:
        stats = get_all_pool_stats(pool_name)
        logger.debug("Failure case : All pool stats %s" % stats)
        logger_utils.error('Did not find pool in expected state after retry '
                           'timeout of %s, down servers: %s, failed with '
                           'error: %s' % (retry_timeout, down_servers, e))
Пример #2
0
def udp_server(servers, port, **kwargs):
    """
    API Helps to start UDP Listen on given Server and Port

    Args:
        :param servers: list of server handles want to Listen on Server
        :type servers: List
        :param port: listener port
        :type port: int/str

    Raises:
        KeyError
    """
    if not servers:
        servers = get_all_server_handle()
    elif isinstance(servers, basestring):
        servers = [servers]
    for server_handle in servers:
        server = get_server_by_handle(server_handle)
        server_vm = server.vm()
        server_ip = server.ip()

        cmd = 'python /root/common/scripts/udp_server.py --ip %s --p %s &> /tmp/udp_server_out_%s &' % (
            server_ip, port, server_ip)
        logger.info("udp_server command: %s" % cmd)
        server_vm.execute_command(cmd, log_error=False)
        sleep(10)
        out = server_vm.execute_command(
            'ps aux | grep \'udp_server.py\' | grep -v grep ')
        if not out:
            fail("UDP Server Process not started .. %s " % out)
        out = server_vm.execute_command('cat /tmp/udp_server_out_%s' %
                                        server_ip)
        if 'starting' not in ''.join(out):
            error("UDP Server not started .. %s " % out)
Пример #3
0
def check_cloud_state(expected_status='CLOUD_STATE_PLACEMENT_READY', **kwargs):
    cloud_name = kwargs.get('cloud_name', None)
    # config = get_config()
    # ctrl_clouds =  kwargs.get('clouds', config.testbed[config.site_name].cloud)
    asleep(msg='waiting for cloud state', delay=10)
    status_code, resp_json = get('cloud-inventory')
    #resp_json = resp.json()
    #if len(ctrl_clouds) != int(resp_json['count']):
    #    error("Number of Configured Clouds not as Received. Configured=%s Received=%s" %(len(ctrl_clouds), resp_json['count']))
    for cloud_obj in resp_json['results']:
        if cloud_name and cloud_name != cloud_obj['config']['name']:
            continue
        if 'error' in cloud_obj['status']:
            error('Received Error in cloud status %s' %
                  cloud_obj['status']['error'])
        cloud_status = cloud_obj['status']['state']
        last_reason = cloud_obj['status'].get('reason', '')
        # To handle special? case where cloud is up but about to be reconfigured
        # REVIEW any other reasons that we need to account for?
        if cloud_status != expected_status or 'Pending re-config' in last_reason:
            if cloud_obj['config']['vtype'] == 'CLOUD_AWS':
                asleep("additional delay for AWS cloud", delay=30)
            error(
                'Cloud Status is not as expected or reason not null.  Expected=%s Received=%s, reason = %s',
                expected_status, cloud_status, last_reason)
    return True
Пример #4
0
def change_role_privileges_and_expect_failure(role_name, resource_name, access,
                                              **kwargs):
    """ API to Change the User Role Privileges and Expect to Failure.

    Args:
        :param role_name: User access role name
        :type role_name: str
        :param resource_name: Privilege/Resource name
        :type resource_name: str
        :param access: User Access Mode
        :type access: str
    Returns:
        Success - Return Failure message
        Failure - Raise Error with relative message
    """
    status_code, resp = change_role_privileges(role_name,
                                               resource_name,
                                               access,
                                               check_status_code=False,
                                               **kwargs)
    if status_code == 400 and 'Cannot modify Role' in str(resp):
        logger.info(
            "Expected Failure with status code 400 and Cannot modify Role in error message"
        )
    else:
        error(
            'Status code Expected=400 Received=%s and error msg Exepcted=Cannot modify Role \
               Received=%s' % (status_code, str(resp)))
Пример #5
0
def get_uuid_from_ref(url_ref=None):
    if not url_ref:
        error("URL ref is None")
    out = str(url_ref).split('/')[-1]
    if '#' in out:
        out = out.split('#')
        out = out[0]
    return out
Пример #6
0
 def test_error(self):
     ''' error() is continuable '''
     logger.info('error once')
     error('error 1')
     Test_Failures.error_count = 1
     logger.info('error twice')
     error('error 2')
     Test_Failures.error_count = 2
     logger.error_list = []  # reset to not actually fail
Пример #7
0
def upload_file(file_path, directory, should_pass=True, expected_error=None):
    try:
        _upload_multipart_file_form(file_path, directory)
    except Exception as e:
        if not should_pass:
            if expected_error and expected_error not in str(e):
                raise e
        else:
            error("File upload should have passed but got: " + str(e))
Пример #8
0
def wait_for_vcenter_state(cloud_name, exp_states):
    status_code, data = get('vimgrvcenterruntime')
    logger.debug('wait_for_vcenter_state data: %s' % data)
    if data['count'] == 0:
        error('Inventory is not complete')
    state = data['results'][0]['inventory_state']

    if state in exp_states:
        return True
    else:
        error('Check for state %s one more time got %s' % (exp_states, state))
Пример #9
0
def get_vm_cloud_sdkconn(vm_name):
    config = get_config()
    site_name = config.get_mode(key='site_name')

    tb_json = config.testbed[site_name].tb_json
    vm_json, cloud_json = get_vm_and_cloud_json(vm_name, tb_json)
    vm_cloud_sdk_conn = get_vm_cloud_sdk(cloud_json=cloud_json,
                                         vm_json=vm_json)

    if not vm_cloud_sdk_conn:
        error('Expected non-None vm_cloud_sdk_conn for vm %s' % vm_name)

    return vm_cloud_sdk_conn
Пример #10
0
def log_count_should_increase_by(vs_name, initial_log_count,
                                 expected_increase_log_count, **kwargs):
    '''
    Blocks until log count has increased by expected_log_count or retry_timeout
    time is reached
    '''
    logger.info('initial_log_count: %s' % initial_log_count)
    logger.info('expected_increase_log_count: %s' %
                expected_increase_log_count)
    resp = has_log_count_increased_by(vs_name, initial_log_count,
                                      expected_increase_log_count, **kwargs)

    if not resp:
        logger_utils.error(
            'Log count did not increase by expected value vs %s' % vs_name)
    else:
        return resp
Пример #11
0
 def poweroff(self, vm_name=None):
     """ Power off openstack vm """
     if not vm_name:
         vm_name = self.vm_json.get('name')
     vm = None
     try:
         vm = self.nova.servers.find(name='%s' % vm_name)
         logger.info('vm state %s' %vm.status)
         if vm.status == 'SHUTOFF':
             logger.info('vm is already powered off, return')
             return True
     except Exception:
         error("can't find vm %s in openstack, exp: %s" % (vm_name, e))
     if vm:
         logger.debug('Found vm: %s to poweroff' % vm_name)
         vm.stop()
     return (self.check_vm_status(vm_name, 'SHUTOFF'))
Пример #12
0
def is_there_IO_error(client_range,
                      log_file='httptest_io_error*',
                      raise_exception=False):
    """
        While traffic genearation IO errors are generally logged at
        /tmp/httptest_<timestamp>.log. The function checks if
        the log file is present or not.
    """

    if isinstance(log_file, basestring):
        logs = [log_file]
    else:
        logger_utils.fail(
            'HttpTest failed. Error - Log file should be of type string, but got : %s'
            % log_file)

    for _log_file in logs:

        logger.info('is_there_IO_error: %s\n' % log_file)

        clients = get_clients_from_range(client_range)
        vm, ip = traffic_manager.get_client_by_handle(clients[0])
        logger.debug('VM IP, NAME, CLIENT: %s, %s, %s' % (vm.ip, vm.name, ip))
        cmd = 'tail -5 %s' % log_file
        resp = vm.execute_command(cmd)
        if len(resp) > 0 and raise_exception:
            error_msg = 'Get request failed\n'
            for error in resp:
                try:
                    msg = json.loads(error)
                except Exception:
                    # When httptest fails, it doesn't write error log in json
                    # format.
                    logger_utils.error('HttpTest failed. Error - %s' % error)
                error_msg += 'Client: %s\nValidation: %s\nExpected: %s\nActual: ' \
                             '%s\n\n' % (msg['client'], msg['error_code'],
                                         msg['expected'], msg['actual'])
            # Cleaning up before raising exception
            vm.execute_command('rm %s &> /tmp/httptest' % log_file)
            logger_utils.error(error_msg)
        else:
            if len(resp) == 0:
                return 'False'
            else:
                logger.info('Failures: %s' % resp)
                return 'True'
Пример #13
0
def clean_secondary_ips_on_client_server(tb_json):
    client_server_vms = [vm_json for vm_json in tb_json.get('Vm') \
                if vm_json.get('type') in ['client', 'server']]
    for vms in client_server_vms:
        cloud_name = vms.get('cloud_name', 'Default-Cloud') 
        try:
            cloud_json = [cloud_json for cloud_json in tb_json.get('Cloud') \
                    if cloud_json.get('name') == cloud_name][0]
        except TypeError:
            logger.info('Must be no-access cloud?')
        except IndexError:
            logger.info("Can't find vm cloud under Clouds for %s" %vms['name'])

        if not cloud_json:
            cloud_json = None #Setting it back to None as it must have become an empty list
            try:
                # Check in vm clouds
                cloud_json = [cloud_json for cloud_json in tb_json.get('VmCloud') \
                    if cloud_json.get('name') == cloud_name][0]
            except TypeError:
                logger.info('no VmCloud defined in the testbed')
        
        if not cloud_json:
            logger.info('cloud_json None, no access cloud?')
            continue
        sdk_conn = None

        try:
            if cloud_json.get('vtype') == 'CLOUD_AWS':
                sdk_conn = Aws(cloud_configuration_json= cloud_json.get('aws_configuration'))
                logger.info("AWS SDK connection successfull for cloud %s" %cloud_json.get('name'))
                logger.info("Finding instance for %s " %vms['name'])
                instance = sdk_conn._Aws__get_instance(vms['name'])
                for interface in instance.interfaces:
                    sec_ips = [ ip.private_ip_address for ip in interface.private_ip_addresses if ip.primary == False ]
                    logger.info('Unassigning secondary IPs : %s from interface %s' % (sec_ips, interface.id))
                    if sec_ips:
                        interface.connection.unassign_private_ip_addresses(
                                    network_interface_id=interface.id, private_ip_addresses=sec_ips)
        except Exception as e:
            error("clean_secondary_ips_on_client_server failed due to %s" %e)
    sdk_conn.disconnect()
Пример #14
0
def _upload_multipart_file_form(file_path, file_uri):
    if not os.path.exists(file_path):
        error('File not found: ' + file_path)

    #port = get_controller_port()i
    session = get_session()
    #port = session.port
    port = 443
    path = 'https://%s:%s/api/fileservice/uploads' % (session.controller_ip,
                                                      port)
    file_name = os.path.basename(file_path)

    with open(file_path, "rb") as f:
        f_data = {
            "file": (file_name, f, "application/octet-stream"),
            "uri": file_uri
        }
        m = MultipartEncoder(f_data)
        r = session.post(path, data=m)

        if r.status_code > 300:
            error('Fail to upload: ' + r.content)
Пример #15
0
 def sg_del_retry():
     try:
         sdk_conn.vpc.delete_security_group(group_id=sg_group.id)
     except Exception as e:
         error("Delete security group failed with exception %s " %e)
Пример #16
0
 def ins_ter_retry():
     try:
         instance.terminate()
     except Exception as e:
         error("Delete instance failed with exception %s " %e)
Пример #17
0
def get_controller_processes(controller, retry=5, role=None, **kwargs):
    """
    Returns a list of pids for all controller processes
    :param controller:
    :param retry:
    :param role:
    :param kwargs:
    :return:
    """

    if role is None:
        role = get_node_role(controller.ip)

    _ignore = ['redis-server', 'setup_system', 'nginx', 'snmpd', 'aviportal']
    if role == 'CLUSTER_LEADER':
        _add = ['zookeeper', 'log_core_manager', 'vi-mgr',
                'redis-server INSTANCE=5001']
    else:
        _add = ['zookeeper', 'log_core_manager', 'redis-server INSTANCE=5001']
    proc_names = get_controller_process_names(role, controller_vm=controller, **kwargs)
    proc_names = [proc for proc in proc_names if proc not in _ignore]
    proc_names.extend(_add)
    if role != 'CLUSTER_LEADER':
        proc_names.remove('redis-server INSTANCE=5001')

    procs = {}
    command_str = []
    for proc in proc_names:
        try:
            if ':' in proc:
                service, instance = proc.split(':')
                command_str.append('sudo status %s INSTANCE=%s;' % (service, instance))
            else:
                command_str.append('sudo status %s;' % proc)
        except Exception as e:
            logger_utils.fail("Unexpected error:", e.message)

    retries = 5
    timeout = 300
    elapsed_time = 0
    resp = ''
    while retries:
        try:
            if rest.get_cloud_type() == 'baremetal':
                resp = controller.execute_on_docker_container(''.join(command_str))
                resp = resp[controller.ip].splitlines()
            elif rest.get_cloud_type() == 'gcp':
                resp = controller.execute_on_docker_container(''.join(command_str))
                resp = resp[controller.vm_public_ip].splitlines()
            else:
                resp = controller.execute_command(''.join(command_str))
        except Exception as e:
            # Controller might have rebooted and we are reporting a reboot failure
            # so just reset all the processes and core links for VM
            logger_utils.asleep(delay=60)
            controller.processes = []
            controller.latest_core = None
            logger_utils.error(
                'Failed to connect to Controller: %s, %s' % (controller.ip, e))
        except Exception as ie:
            # Not a valid job name?
            logger_utils.fail('Process not running on %s. Error: %s' %
                                       (controller.ip, ie.message))
        except Exception as e:
            logger.info('other ex, retry: %s' % e)
            retries -= 1
            logger_utils.asleep(delay=10)
            continue
        crashed_processes = [ proc for proc in resp if 'stop/waiting' in proc ]
        if len(crashed_processes)>0 and elapsed_time<timeout:
            logger_utils.asleep(delay=10)
            elapsed_time += 10
            continue
        break

    # Response array contains blank items which need to be removed
    resp = [value for value in resp if value != '']

    # Removed Process "\tpost-start process <pid>" from response\n
    # Pid values are captured by index but as post-start process gets introduced
    # in response it changes the pid index and hence it is removed
    for process_resp in resp:
        if 'post-start process' in process_resp:
            resp.remove(process_resp)

    current_proc = None
    try:
        for index, proc in enumerate(proc_names):
            current_proc = proc
            proc_pid = ''
            if proc in ['postgresql', 'postgresql_metrics']:
                if rest.get_cloud_type() == 'gcp':
                    out = controller.execute_on_docker_container("cat '/var/run/%s.pid'" % proc)
                    out = out[controller.vm_public_ip].splitlines()
                else:
                    out = (controller.execute_command_fab("cat '/var/run/%s.pid'" % proc))
                pid = str(out[0]) if len(out) > 0 else None
                if not pid or 'no such file' in pid.lower():
                    pass
                else:
                    proc_pid = pid
            procname = proc.split()[0]
            procresp = ''
            if 'instance' in proc.lower():
                instance_id = proc.split('=')[1]
                logger.info('Procname: %s   InstanceId: %s' % (procname, instance_id))
                for value in resp:
                    if procname in value and '(%s)' % instance_id in value:
                        procresp = value
                        break
            else:
                for value in resp:
                    resp_procname = value.split()[0]
                    if proc in ['postgresql', 'postgresql_metrics']:
                        procresp = proc_pid
                    elif procname == resp_procname:
                        procresp = value
                        break
            logger.info('Proc: %s  RespProc: %s' % (proc, procresp))
            match = re.search('(\d+$)', procresp)
            if match:
                procs[proc] = (int(match.group(1)))
            else:
                raise IndexError

    except Exception as e:
        # If fails to parse output, then match.group raises attribute error
        # In case of crash, sleep for 5 secs to allow process to restart
        # avoiding cascading failures.
        if controller.processes:
            logger_utils.asleep(delay=5)
            retry -= 1
            if retry < 1:
                del controller.processes[:]
            return get_controller_processes(controller, retry=retry, role=role, **kwargs)
        else:
            # The case where controller process not running at all!
            logger_utils.fail(
                'process: %s is not running on controller %s!' % (
                    current_proc, controller.ip))
    except Exception as ie:
        # Job not running or error while capturing output
        logger_utils.fail('Process %s not running on %s.' %
                                   (current_proc, controller.ip))

    logger.info('get_controller_processes: %s' % procs)
    return procs
Пример #18
0
 def img_del_retry():
     try:
         image.deregister(delete_snapshot=True)
     except Exception as e:
         error("Delete image failed with exception %s " %e)
Пример #19
0
def pool_wellness_check(pool_name,
                        t_state,
                        t_num,
                        t_up,
                        t_enabled,
                        skip_detail_check=0):
    """

    :param pool_name:
    :param t_state:
    :param t_num:
    :param t_up:
    :param t_enabled:
    :param skip_detail_check:
    :return:
    """
    logger_utils.asleep(msg='wait', delay=10)
    t_num = int(t_num)
    t_up = int(t_up)
    t_enabled = int(t_enabled)
    logger.info(
        '## start pool wellness check pool=%s t_state=%s t_num=%d t_up=%d t_enabled=%d'
        % (pool_name, t_state, t_num, t_up, t_enabled))

    pool, pool_summary, pool_detail = cache_pool(pool_name)

    try:
        summary_oper_state = pool_summary['oper_status']['state']
        if isinstance(pool_detail, list):
            detail_oper_state = pool_detail[0]['oper_status']['state']
            pool_det = pool_detail[0]
        else:
            detail_oper_state = pool_detail['oper_status']['state']
            pool_det = pool_detail
    except KeyError as err_msg:
        logger_utils.fail('## oper status not available: %s' % err_msg)

    if not skip_detail_check:
        if summary_oper_state != detail_oper_state:
            logger_utils.error(
                "Cache issue - summary state %s != detail state %s" %
                (summary_oper_state, detail_oper_state))
            logger_utils.fail(
                "Cache issue - summary state %s != detail state %s" %
                (summary_oper_state, detail_oper_state))

    if summary_oper_state != t_state:
        logger.debug("summary state %s != expected state %s" %
                     (summary_oper_state, t_state))
        logger_utils.fail("summary state %s != expected state %s" %
                          (summary_oper_state, t_state))

    if int(pool_summary['num_servers']) != t_num:
        logger.trace(
            "num servers mismatch (e-%d, s-%d, d-%d)" %
            (t_num, pool_summary['num_servers'], pool_det['num_servers']))
        logger.fail(
            "num servers mismatch (e-%d, s-%d, d-%d)" %
            (t_num, pool_summary['num_servers'], pool_det['num_servers']))

    if int(pool_summary['num_servers_enabled']) != t_enabled:
        logger.trace("num servers enabled mismatch (e-%d, s-%d, d-%d)" %
                     (t_enabled, pool_summary['num_servers_enabled'],
                      pool_det['num_servers_enabled']))
        logger_utils.fail("num servers enabled mismatch (e-%d, s-%d, d-%d)" %
                          (t_enabled, pool_summary['num_servers_enabled'],
                           pool_det['num_servers_enabled']))

    if int(pool_summary['num_servers_up']) != t_up:
        logger.trace(
            "num servers up mismatch (e-%d, s-%d, d-%d)" %
            (t_up, pool_summary['num_servers_up'], pool_det['num_servers_up']))
        logger_utils.fail(
            "num servers up mismatch (e-%d, s-%d, d-%d)" %
            (t_up, pool_summary['num_servers_up'], pool_det['num_servers_up']))

    return True
Пример #20
0
 def ni_del_retry():
     try:
         sdk_conn.vpc.delete_network_interface(ni.id)
     except Exception as e:
         error("Delete Network Interface failed with exception %s " %e)