Пример #1
0
def start_ib_acme(nodes):
    ssa_tools_utils.pdsh_run(nodes, 'mkdir -p %s' % LOG_DIR)
    for i in xrange(0, NUM_OF_QUERIES):
        cmd = 'nohup %s/ib_stress.sh %d > /dev/null &' % ( SSA_SCRIPTS, IB_ACME_DELAY)
        ssa_tools_utils.pdsh_run(nodes, cmd)

    return 0
Пример #2
0
def test_0_0_0(ibmsnet):
    test_header = inspect.getframeinfo(inspect.currentframe()).function
    phase = 'Subnet UP '
    rch_global_dict['test_description'][test_header] = phase
    status = 0
    log_dir = '%s/%s' % (rch_global_dict['log_dir'], test_header)
    os.mkdir(log_dir)
    print commands.getoutput('chmod 777 %s' % log_dir)

    for i in xrange(0, rch_global_dict['subnet_up_num']):
        print '[%s] Subnet UP #%d out of %d' % (time.strftime(
            "%b %d %H:%M:%S"), i, rch_global_dict['subnet_up_num'])
        core_node = random.choice(rch_global_dict['core_nodes'])
        ssa_tools_utils.pdsh_run(core_node,
                                 'sudo kill -s HUP `pidof opensm valgrind`')
        time.sleep(
            random.randint(rch_global_dict['min_delay'],
                           rch_global_dict['max_delay']))

    for cmd in [
            '%s/maintain.py -t %s --setup status > %s/ssa_status.log' %
        (ssa_tools_utils.SSA_HOME, rch_global_dict['topology'],
         rch_global_dict['log_dir']),
            '%s/maintain.py -t %s -e > %s/ssa_errors.log' %
        (ssa_tools_utils.SSA_HOME, rch_global_dict['topology'],
         rch_global_dict['log_dir']),
            '%s/logcollector.py -t %s -o %s' %
        (ssa_tools_utils.SSA_HOME, rch_global_dict['topology'],
         rch_global_dict['log_dir'])
    ]:
        print cmd
        o = commands.getoutput(cmd)

    o = commands.getoutput("cat %s/ssa_errors.log" %
                           rch_global_dict['log_dir'])
    o_status = commands.getoutput("cat %s/ssa_status.log" %
                                  rch_global_dict['log_dir'])
    if o.find('Found errors found on') > 0 or o_status.find('STOP') >= 0:
        print 'There are errors in  %s/ssa_errors.log' % rch_global_dict[
            'log_dir']
        status = 1

    test_report(test_header, phase, status)
    return status
Пример #3
0
def start_stress(ibmsnet):
    test_header = inspect.getframeinfo(inspect.currentframe()).function    
    phase = 'Stress Test'
    rch_global_dict['test_description'][test_header] = phase
    status = 0
    counter_delay = 30
    number_of_queries = 10

    acm_nodes = rch_global_dict['acm_nodes']
    #acm_nodes = random.sample(rch_global_dict['acm_nodes'],len(rch_global_dict['acm_nodes'])/2)

    print 'Start counters on %s' % ','.join(acm_nodes)
    counters_log = '%s/`hostname`_counters.log' % (rch_global_dict['scretch_folder'])
    ssa_tools_utils.pdsh_run(acm_nodes, 'mkdir -p %s' % rch_global_dict['scretch_folder'])
    ssa_tools_utils.pdsh_run(acm_nodes, 'nohup %s/server_counters.sh `/usr/bin/pgrep ibacm` %d > %s &' % ( ssa_tools_utils.SSA_SCRIPTS, counter_delay, counters_log))

    print 'Start ACM stress on %s' % ','.join(acm_nodes)

    for i in xrange(0, number_of_queries):
        ib_acme_log = '%s/`hostname`_ibstress_%d.log' % (rch_global_dict['scretch_folder'], i)
        cmd = 'nohup %s/ib_stress.sh %d > %s &' % ( ssa_tools_utils.SSA_SCRIPTS, i, ib_acme_log)
        ssa_tools_utils.pdsh_run(acm_nodes,cmd)

    test_report(test_header, phase, status)
    return status
Пример #4
0
def stop_stress(ibmsnet):
    test_header = inspect.getframeinfo(inspect.currentframe()).function    
    phase = 'Stress Test'
    rch_global_dict['test_description'][test_header] = phase
    status = 0
    acm_nodes = rch_global_dict['acm_nodes']

    o = ssa_tools_utils.pdsh_run(acm_nodes, 'killall ib_stress.sh server_counters.sh 2>/dev/null')
    time.sleep(5)
    o = ssa_tools_utils.pdsh_run(acm_nodes, 'sudo mv %s/* %s/ 2>/dev/null' % (rch_global_dict['scretch_folder'], rch_global_dict['test_folder']))
    time.sleep(10)
    
    o = commands.getoutput('%s/maintain.py -t %s -e' % (ssa_tools_utils.SSA_HOME, rch_global_dict['topology']))
    o = commands.getoutput('%s/logcollector.py -t %s -o %s' % (ssa_tools_utils.SSA_HOME, rch_global_dict['topology'], rch_global_dict['test_folder']))

    for cmd in ['grep -rni "failed|ERR" %s' % rch_global_dict['scretch_folder'],]:
        o = commands.getoutput(cmd)
        print "%s\n%s" % (cmd, o)
        if o != '':
            status = 1

    test_report(test_header, phase, status)
    return status
Пример #5
0
def sanity_test_1(cores, als, acms, data):

    status = 0

    osmlid = commands.getoutput(
        "/usr/sbin/ibstat |grep -a5 Act|grep SM|awk '{print $NF}'").rstrip(
            '\n')

    for core in cores:
        if data[core][LID] == osmlid:
            core_master = core
            break

    access_svc = als[0]
    acm_svc = acms[0]

    print '==================================================================='
    print '========================= SANITY TEST 1 ==========================='
    print '==================================================================='

    for acm in acms:
        if acm == acm_svc:
            continue

        status = test_acm_by_lid_query(acm, data[acm][LID], data[acm_svc][LID])
        if status != 0:
            return status

        status = test_acm_by_gid_query(acm, data[acm][GID], data[acm_svc][GID])
        if status != 0:
            return status

    stop_services(core_master, access_svc, acm_svc)

    # Disconect ACM from fabric
    (remote_lid, remote_port) = get_node_remote(acm_svc)
    cmd = 'ibportstate %s %s disable' % (remote_lid, remote_port)
    print cmd
    ssa_tools_utils.pdsh_run(core_master, cmd)
    print 'Wait'
    time.sleep(120)

    for acm in acms:
        if acm == acm_svc:
            continue

        status = test_acm_by_lid_query(acm,
                                       data[acm][LID],
                                       data[acm_svc][LID],
                                       initial_query=1,
                                       print_err=0)
        if status == 0:
            print 'ERROR. ACM %s LID %s still exists in %s LID %s cache' % \
                    (acm_svc, str(data[acm_svc][LID]), acm, str(data[acm][LID]))
            cmd = 'ibportstate %s %s enable' % (remote_lid, remote_port)
            print cmd
            ssa_tools_utils.pdsh_run(core_master, cmd)
            return 1

        status = test_acm_by_gid_query(acm,
                                       data[acm][GID],
                                       data[acm_svc][GID],
                                       print_err=0)
        if status == 0:
            print 'ERROR. ACM %s GID %s still exists in %s GID %s cache' % \
                    (acm_svc, str(data[acm_svc][GID]), acm, str(data[acm][GID]))
            cmd = 'ibportstate %s %s enable' % (remote_lid, remote_port)
            print cmd
            ssa_tools_utils.pdsh_run(core_master, cmd)
            return 1

    # Reconnect ACM back to fabric
    cmd = 'ibportstate %s %s enable' % (remote_lid, remote_port)
    print cmd
    # command can be run on any node except for the disconected ACM
    ssa_tools_utils.pdsh_run(core_master, cmd)
    time.sleep(60)

    start_services(core_master, access_svc, acm_svc)
    status = not status

    print '==================================================================='
    print '==================== SANITY TEST 1 COMPLETE ======================='
    print '==================================================================='

    return status
Пример #6
0
def stop_ib_acme(nodes):
    ssa_tools_utils.pdsh_run(nodes, 'killall ib_stress.sh 2>/dev/null')
    return 0
Пример #7
0
def stop_counters(nodes):
    ssa_tools_utils.pdsh_run(nodes, 'killall server_counters.sh 2>/dev/null')
    return 0
Пример #8
0
def start_counters(nodes):
    status = 0
    counters_log = '%s/`hostname`_counters.log' % LOG_DIR
    ssa_tools_utils.pdsh_run(nodes, 'mkdir -p %s' % LOG_DIR)
    ssa_tools_utils.pdsh_run(nodes, 'nohup %s/server_counters.sh `pgrep "ibacm|ibssa|opensm"` %d > %s &' % ( SSA_SCRIPTS, COUNTER_DELAY, counters_log))
    return 0
Пример #9
0
        rch_global_dict['exclude_tests'].append(options.exclude.split(','))
else:
    tests = options.include.split(',')


if options.reinstall:
    sources = " "
    if options.ssa_sources:
        sources = "%s %s" % (options.ssa_sources, rch_global_dict['scretch_folder'])
    coverage = " "
    if options.coverage:
        coverage = "export COVERAGE=1"
    else:
        coverage = "export COVERAGE=0"
    print "%s; sudo -E %s/ssa_install.sh %s" % (coverage, ssa_tools_utils.SSA_HOME, sources)
    ssa_tools_utils.pdsh_run(rch_global_dict['nodes'], '%s; sudo -E %s/ssa_install.sh %s' % (coverage, ssa_tools_utils.SSA_HOME, sources))
    setup(rch_global_dict['topology'], 'start')

gDictParams = {}

#Check and save setup
ssa_setup = open(setup(rch_global_dict['topology'], 'status'), 'r').readlines()
#get everything from check_setup
rch_global_dict['ssa_setup'] = ''.join(ssa_setup[ssa_setup.index('*************  check_setup ********************\n'):])
if not options.restart:
    for i in ssa_setup:
        if i.find('STOPPED') >= 0:
            print i.rstrip('\n')
            status = 1
    if status:
        print 'Not all nodes are running'