Exemplo n.º 1
0
def sig_handler(signum, frame):
    # for some reason, I (rmb) was unable to handle TSTP and CONT in the same way
    global manSocket
    if signum == SIGINT:
        raise mpdrunInterrupted, 'SIGINT'
    elif signum == SIGTSTP:
        raise mpdrunInterrupted, 'SIGTSTP'
    elif signum == SIGCONT:
        if manSocket:
            msgToSend = {'cmd': 'signal', 'signo': 'SIGCONT'}
            mpd_send_one_msg(manSocket, msgToSend)
    elif signum == SIGALRM:
        raise mpdrunInterrupted, 'SIGALRM'
Exemplo n.º 2
0
def get_vals_for_attach():
    global nprocs, pgm, pgmArgs, mship, rship, argsFilename, delArgsFile, \
           try0Locally, lineLabels, jobAlias, mergingOutput, conSocket
    global stdinGoesToWho, myExitStatus, manSocket, jobid, username, cwd, totalview
    global outXmlDoc, outXmlEC, outXmlFile, linesPerRank, gdb, gdbAttachJobid
    global execs, users, cwds, paths, args, envvars, limits, hosts, hostList
    global singinitPID, singinitPORT, doingBNR, myHost, myIP

    sjobid = gdbAttachJobid.split('@')  # jobnum and originating host
    msgToSend = {'cmd': 'mpdlistjobs'}
    mpd_send_one_msg(conSocket, msgToSend)
    msg = recv_one_msg_with_timeout(conSocket, 5)
    if not msg:
        mpd_raise('no msg recvd from mpd before timeout')
    if msg['cmd'] != 'local_mpdid':  # get full id of local mpd for filters later
        mpd_raise(
            'did not recv local_mpdid msg from local mpd; instead, recvd: %s' %
            msg)
    else:
        if len(sjobid) == 1:
            sjobid.append(msg['id'])
    got_info = 0
    while 1:
        msg = mpd_recv_one_msg(conSocket)
        if not msg.has_key('cmd'):
            print 'mpdlistjobs: INVALID msg=:%s:' % (msg)
            exit(-1)
        if msg['cmd'] == 'mpdlistjobs_info':
            got_info = 1
            smjobid = msg['jobid'].split(
                '  ')  # jobnum, mpdid, and alias (if present)
            if sjobid[0] == smjobid[0] and sjobid[1] == smjobid[
                    1]:  # jobnum and mpdid
                rank = int(msg['rank'])
                users[(rank, rank)] = msg['username']
                hosts[(rank, rank)] = msg['host']
                execs[(rank, rank)] = msg['pgm']
                cwds[(rank, rank)] = cwd
                paths[(rank, rank)] = environ['PATH']
                args[(rank, rank)] = [msg['clipid']]
                envvars[(rank, rank)] = {}
                limits[(rank, rank)] = {}
        elif msg['cmd'] == 'mpdlistjobs_trailer':
            if not got_info:
                print 'no info on this jobid; probably invalid'
                exit(-1)
            break
        else:
            print 'invaild msg from mpd :%s:' % (msg)
            exit(-1)
    nprocs = len(execs.keys())  # all dicts are the same len here
Exemplo n.º 3
0
        elif argv[i] == '-g':
            single_or_group = 'g'
        else:
            print '** unrecognized arg: %s' % (argv[i])
            usage()
        i += 1
    msgToSend = {
        'cmd': 'mpdsigjob',
        'sigtype': sigtype,
        'jobnum': jobnum,
        'mpdid': mpdid,
        'jobalias': jobalias,
        's_or_g': single_or_group,
        'username': username
    }
    mpd_send_one_msg(conSocket, msgToSend)
    msg = recv_one_msg_with_timeout(conSocket, 5)
    if not msg:
        mpd_raise('no msg recvd from mpd before timeout')
    if msg['cmd'] != 'mpdsigjob_ack':
        if msg['cmd'] == 'already_have_a_console':
            print 'mpd already has a console (e.g. for long ringtest); try later'
        else:
            print 'unexpected message from mpd: %s' % (msg)
        exit(-1)
    if not msg['handled']:
        print 'job not found'
        exit(-1)
    conSocket.close()

Exemplo n.º 4
0
def mpdboot():
    global myHost, fullDirName, topMPDBoot, user
    mpd_set_my_id('mpdboot_rank_notset')
    fullDirName = path.abspath(path.split(argv[0])[0])
    rshCmd = 'ssh'
    user = mpd_get_my_username()
    mpdCmd = path.join(fullDirName, 'mpd.py')
    mpdbootCmd = path.join(fullDirName, 'mpdboot.py')
    hostsFilename = 'mpd.hosts'
    totalNum = 1  # may get chgd below
    debug = 0
    verbosity = 0
    localConsoleArg = ''
    remoteConsoleArg = ''
    myConsoleVal = ''
    oneMPDPerHost = 1
    entryHost = ''
    entryPort = ''
    topMPDBoot = 1
    myHost = gethostname()
    myNcpus = 1
    myIfhn = ''
    try:
        shell = path.split(environ['SHELL'])[-1]
    except:
        shell = 'csh'

    argidx = 1  # skip arg 0
    while argidx < len(argv):
        if argv[argidx] == '-h' or argv[argidx] == '--help':
            usage()
        elif argv[argidx] == '-zentry':  # entry host and port
            if ':' not in argv[argidx + 1]:
                print 'invalid pair of entry host and entry port for -zentry option'
                usage()
            (entryHost, entryPort) = argv[argidx + 1].split(':')
            try:
                ip = gethostbyname_ex(entryHost)[2]  # may fail if invalid host
            except:
                print 'invalid entry host ', entryHost
                stdout.flush()
                usage()
            if not entryPort.isdigit():
                print 'invalid (nonumeric) entry port ', entryPort
                stdout.flush()
                usage()
            entryHost = entryHost
            entryPort = entryPort
            argidx += 2
        elif argv[argidx] == '-zrank':
            topMPDBoot = 0
            myBootRank = int(argv[argidx + 1])
            argidx += 2
        elif argv[argidx] == '-zhosts':
            zhosts = argv[argidx + 1]
            zhosts = zhosts.split(',')
            hostsAndInfo = []
            for zhost in zhosts:
                (host, ncpus, ifhn) = zhost.split(':')
                hostsAndInfo.append({
                    'host': host,
                    'ncpus': ncpus,
                    'ifhn': ifhn
                })
            argidx += 2
        elif argv[argidx] == '-r':  # or --rsh=
            rshCmd = argv[argidx + 1]
            argidx += 2
        elif argv[argidx].startswith('--rsh'):
            splitArg = argv[argidx].split('=')
            try:
                rshCmd = splitArg[1]
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx] == '-u':  # or --user=
            user = argv[argidx + 1]
            argidx += 2
        elif argv[argidx].startswith('--user'):
            splitArg = argv[argidx].split('=')
            try:
                user = splitArg[1]
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx] == '-m':  # or --mpd=
            mpdCmd = argv[argidx + 1]
            argidx += 2
        elif argv[argidx].startswith('--mpd'):
            splitArg = argv[argidx].split('=')
            try:
                mpdCmd = splitArg[1]
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx] == '-f':  # or --file=
            hostsFilename = argv[argidx + 1]
            argidx += 2
        elif argv[argidx].startswith('--file'):
            splitArg = argv[argidx].split('=')
            try:
                hostsFilename = splitArg[1]
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx].startswith('--ncpus'):
            splitArg = argv[argidx].split('=')
            try:
                myNcpus = splitArg[1]
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx].startswith('--ifhn'):
            splitArg = argv[argidx].split('=')
            myIfhn = splitArg[1]
            myHost = splitArg[1]
            argidx += 1
        elif argv[argidx] == '-n':  # or --totalnum=
            totalNum = int(argv[argidx + 1])
            argidx += 2
        elif argv[argidx].startswith('--totalnum'):
            splitArg = argv[argidx].split('=')
            try:
                totalNum = int(splitArg[1])
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx] == '-d' or argv[argidx] == '--debug':
            debug = 1
            argidx += 1
        elif argv[argidx] == '-s' or argv[argidx] == '--shell':
            shell = 'bourne'
            argidx += 1
        elif argv[argidx] == '-v' or argv[argidx] == '--verbose':
            verbosity = 1
            argidx += 1
        elif argv[argidx] == '-1':
            oneMPDPerHost = 0
            argidx += 1
        elif argv[argidx] == '--loccons':
            localConsoleArg = '--loccons'
            argidx += 1
        elif argv[argidx] == '--remcons':
            remoteConsoleArg = '--remcons'
            argidx += 1
        else:
            print 'mpdboot: unrecognized argument:', argv[argidx]
            usage()

    if topMPDBoot:
        lines = []
        if totalNum > 1:
            try:
                f = open(hostsFilename, 'r')
                for line in f:
                    lines.append(line)
            except:
                print 'unable to open (or read) hostsfile %s' % (hostsFilename)
                exit(-1)
        hostsAndInfo = [{'host': myHost, 'ncpus': myNcpus, 'ifhn': myIfhn}]
        for line in lines:
            line = line.strip()
            if not line or line[0] == '#':
                continue
            splitLine = re.split(r'\s+', line)
            host = splitLine[0]
            ncpus = 1  # default
            if ':' in host:
                (host, ncpus) = host.split(':', 1)
                ncpus = int(ncpus)
            ifhn = ''  # default
            for kv in splitLine[1:]:
                (k, v) = kv.split('=', 1)
                if k == 'ifhn':
                    ifhn = v
            hostsAndInfo.append({'host': host, 'ncpus': ncpus, 'ifhn': ifhn})
        if oneMPDPerHost and totalNum > 1:
            oldHosts = hostsAndInfo[:]
            hostsAndInfo = []
            for x in oldHosts:
                keep = 1
                for y in hostsAndInfo:
                    if mpd_same_ips(x['host'], y['host']):
                        keep = 0
                        break
                if keep:
                    hostsAndInfo.append(x)
        if len(hostsAndInfo) < totalNum:  # one is local
            print 'totalNum=%d  num hosts=%d' % (totalNum, len(hostsAndInfo))
            print 'there are not enough hosts on which to start all processes'
            exit(-1)
        myBootRank = 0
        if localConsoleArg:
            myConsoleVal = '-n'
    else:
        if remoteConsoleArg:
            myConsoleVal = '-n'
    anMPDalreadyHere = 0
    for i in range(myBootRank):
        if mpd_same_ips(hostsAndInfo[i]['host'],
                        myHost):  # if one before me on this host
            myConsoleVal = '-n'
            anMPDalreadyHere = 1
            break
    if not anMPDalreadyHere:
        try:
            system('%s/mpdallexit.py > /dev/null' %
                   (fullDirName))  # stop any current mpds
        except:
            pass

    mpd_set_my_id('mpdboot_%s_%d' % (myHost, myBootRank))
    if debug:
        mpd_print(1, 'starting')
    (parent, lchild,
     rchild) = mpd_get_ranks_in_binary_tree(myBootRank, totalNum)
    if debug:
        mpd_print(1, 'p=%d l=%d r=%d' % (parent, lchild, rchild))

    if myIfhn:
        ifhnVal = '--if %s' % (myIfhn)
    elif hostsAndInfo[myBootRank]['ifhn']:
        ifhnVal = '--if %s' % (hostsAndInfo[myBootRank]['ifhn'])
    else:
        ifhnVal = ''
    if entryHost:
        cmd = '%s %s -h %s -p %s -d -e --ncpus %s %s' % \
       (mpdCmd,myConsoleVal,entryHost,entryPort,myNcpus,ifhnVal)
    else:
        cmd = '%s %s -d -e --ncpus %s %s' % \
       (mpdCmd,myConsoleVal,myNcpus,ifhnVal)
    if verbosity:
        mpd_print(1, 'starting local mpd on %s' % (myHost))
    if debug:
        mpd_print(1, 'cmd to run local mpd = :%s:' % (cmd))

    if not access(mpdCmd, X_OK):
        err_exit('cannot access mpd cmd :%s:' % (mpdCmd))
    locMPD = Popen4(cmd, 0)
    locMPDFD = locMPD.fromchild
    locMPDPort = locMPDFD.readline().strip()
    if locMPDPort.isdigit():
        # can't do this until he's already in his ring
        locMPDSocket = mpd_get_inet_socket_and_connect(myHost, int(locMPDPort))
        if locMPDSocket:
            msgToSend = {
                'cmd': 'ping',
                'host': 'ping',
                'port': 0
            }  # dummy host & port
            mpd_send_one_msg(locMPDSocket, {
                'cmd': 'ping',
                'host': myHost,
                'port': 0
            })
            msg = mpd_recv_one_msg(locMPDSocket)  # RMB: WITH TIMEOUT ??
            if not msg or not msg.has_key('cmd') or msg['cmd'] != 'ping_ack':
                err_exit(
                    '%d: unable to ping local mpd; invalid msg from mpd :%s:' %
                    (myBootRank, msg))
            locMPDSocket.close()
        else:
            err_exit('failed to connect to mpd')
    else:
        err_exit('%d: invalid port from mpd %s' %
                 (myBootRank, str(locMPDPort)))

    if not entryHost:
        entryHost = myHost
        entryPort = locMPDPort

    if rshCmd == 'ssh':
        xOpt = '-x'
    else:
        xOpt = ''

    lfd = 0
    rfd = 0
    fdsToSelect = []
    if debug:
        debugArg = '-d'
    else:
        debugArg = ''
    if verbosity:
        verboseArg = '-v'
    else:
        verboseArg = ''
    if lchild >= 0:
        zhosts = [
            "%s:%s:%s" % (h['host'], h['ncpus'], h['ifhn'])
            for h in hostsAndInfo
        ]
        if hostsAndInfo[lchild]['ifhn']:
            ifhnVal = '--ifhn=%s' % (hostsAndInfo[lchild]['ifhn'])
        else:
            ifhnVal = ''
        cmd = "%s %s %s -n '%s --ncpus=%s %s -r %s -m %s -n %d %s %s %s -zentry %s:%s -zrank %s -zhosts %s </dev/null ' " % \
              (rshCmd, xOpt, hostsAndInfo[lchild]['host'], mpdbootCmd,
               hostsAndInfo[lchild]['ncpus'],ifhnVal,
        rshCmd, mpdCmd, totalNum, debugArg, verboseArg, remoteConsoleArg, entryHost,
        entryPort, lchild,
        ','.join(zhosts) )
        if verbosity:
            mpd_print(1, 'starting remote mpd on %s' % (hostsAndInfo[lchild]))
        if debug:
            mpd_print(1, 'cmd to run lchild boot = :%s:' % (cmd))
        lchildMPDBoot = Popen4(cmd, 0)
        lfd = lchildMPDBoot.fromchild
        fdsToSelect.append(lfd)
    if rchild >= 0:
        zhosts = [
            "%s:%s:%s" % (h['host'], h['ncpus'], h['ifhn'])
            for h in hostsAndInfo
        ]
        if hostsAndInfo[rchild]['ifhn']:
            ifhnVal = '--ifhn=%s' % (hostsAndInfo[rchild]['ifhn'])
        else:
            ifhnVal = ''
        cmd = "%s %s %s -n '%s --ncpus=%s %s -r %s -m %s -n %d %s %s %s -zentry %s:%s -zrank %s -zhosts %s </dev/null ' " % \
              (rshCmd, xOpt, hostsAndInfo[rchild]['host'], mpdbootCmd,
               hostsAndInfo[rchild]['ncpus'],ifhnVal,
        rshCmd, mpdCmd, totalNum, debugArg, verboseArg, remoteConsoleArg, entryHost,
        entryPort, rchild,
        ','.join(zhosts) )
        if verbosity:
            mpd_print(1, 'starting remote mpd on %s' % (hostsAndInfo[rchild]))
        if debug:
            mpd_print(1, 'cmd to run rchild boot = :%s:' % (cmd))
        rchildMPDBoot = Popen4(cmd, 0)
        rfd = rchildMPDBoot.fromchild
        fdsToSelect.append(rfd)

    lfd_first_line = 1
    rfd_first_line = 1
    while fdsToSelect:
        try:
            (readyFDs, unused1, unused2) = select(fdsToSelect, [], [], 0.1)
        except error, errmsg:
            mpd_raise('mpdboot: select failed: errmsg=:%s:' % (errmsg))
        if lfd and lfd in readyFDs:
            line = lfd.readline()
            if line:
                if line.find('RC=MPDBOOT_ERREXIT') >= 0:
                    err_exit('RC=MPDBOOT_ERREXIT')
                else:
                    if not verbosity and lfd_first_line:
                        lfd_first_line = 0
                        mpd_print(
                            1,
                            "error trying to start mpd(boot) at %d %s; output:"
                            % (lchild, hostsAndInfo[lchild]))
                    print '  ', line,
                    stdout.flush()
            else:
                lfd.close()
                fdsToSelect.remove(lfd)
        if rfd and rfd in readyFDs:
            line = rfd.readline()
            if line:
                if line.find('RC=MPDBOOT_ERREXIT') >= 0:
                    err_exit('RC=MPDBOOT_ERREXIT')
                else:
                    if not verbosity and rfd_first_line:
                        rfd_first_line = 0
                        mpd_print(
                            1,
                            "error trying to start mpd(boot) at %d %s; output:"
                            % (rchild, hostsAndInfo[rchild]))
                    print '  ', line,
                    stdout.flush()
            else:
                rfd.close()
                fdsToSelect.remove(rfd)
Exemplo n.º 5
0
     mpd_send_one_line(conSocket, msgToSend)
 mpdid = ''
 if argv[1] == '-a':
     jobalias = argv[2]
     jobnum = '0'
 else:
     jobalias = ''
     jobid = argv[1]
     sjobid = jobid.split('@')
     jobnum = sjobid[0]
     if len(sjobid) > 1:
         mpdid = sjobid[1]
 mpd_send_one_msg(
     conSocket, {
         'cmd': 'mpdkilljob',
         'jobnum': jobnum,
         'mpdid': mpdid,
         'jobalias': jobalias,
         'username': username
     })
 msg = recv_one_msg_with_timeout(conSocket, 5)
 if not msg:
     mpd_raise('no msg recvd from mpd before timeout')
 if msg['cmd'] != 'mpdkilljob_ack':
     if msg['cmd'] == 'already_have_a_console':
         print 'mpd already has a console (e.g. for long ringtest); try later'
     else:
         print 'unexpected message from mpd: %s' % (msg)
     exit(-1)
 if not msg['handled']:
     print 'job not found'
     exit(-1)
Exemplo n.º 6
0
def get_args_from_file():
    global nprocs, pgm, pgmArgs, mship, rship, argsFilename, delArgsFile, \
           try0Locally, lineLabels, jobAlias, mergingOutput, conSocket
    global stdinGoesToWho, myExitStatus, manSocket, jobid, username, cwd, totalview
    global outXmlDoc, outXmlEC, outXmlFile, linesPerRank, gdb, gdbAttachJobid
    global execs, users, cwds, paths, args, envvars, limits, hosts, hostList
    global singinitPID, singinitPORT, doingBNR, myHost, myIP

    try:
        argsFile = open(argsFilename, 'r')
    except:
        print 'could not open job specification file %s' % (argsFilename)
        myExitStatus = -1  # used in main
        exit(myExitStatus)  # really forces jump back into main
    file_contents = argsFile.read()
    if delArgsFile:
        unlink(argsFilename)
    try:
        from xml.dom.minidom import parseString  #import only if needed
    except:
        print 'need xml parser like xml.dom.minidom'
        myExitStatus = -1  # used in main
        exit(myExitStatus)  # really forces jump back into main
    parsedArgs = parseString(file_contents)
    if parsedArgs.documentElement.tagName != 'create-process-group':
        print 'expecting create-process-group; got unrecognized doctype: %s' % \
              (parsedArgs.documentElement.tagName)
        myExitStatus = -1  # used in main
        exit(myExitStatus)  # really forces jump back into main
    createReq = parsedArgs.getElementsByTagName('create-process-group')[0]
    if createReq.hasAttribute('totalprocs'):
        nprocs = int(createReq.getAttribute('totalprocs'))
    else:
        print '** totalprocs not specified in %s' % argsFilename
        myExitStatus = -1  # used in main
        exit(myExitStatus)  # really forces jump back into main
    if createReq.hasAttribute('dont_try_0_locally'):
        try0Locally = 0
    if createReq.hasAttribute('output')  and  \
       createReq.getAttribute('output') == 'label':
        lineLabels = 1
    if createReq.hasAttribute('net_interface'):
        myHost = createReq.getAttribute('net_interface')
        myIfhn = myHost
    if createReq.hasAttribute('pgid'):  # our jobalias
        jobAlias = createReq.getAttribute('pgid')
    if createReq.hasAttribute('stdin_goes_to_who'):
        stdinGoesToWho = createReq.getAttribute('stdin_goes_to_who')
    if createReq.hasAttribute('doing_bnr'):
        doingBNR = int(createReq.getAttribute('doing_bnr'))
    if createReq.hasAttribute('gdb'):
        gdb = int(createReq.getAttribute('gdb'))
        if gdb:
            mergingOutput = 1  # implied
            lineLabels = 1  # implied
            stdinGoesToWho = 'all'  # chgd to 0 - nprocs-1 when nprocs avail
    if createReq.hasAttribute('tv'):
        totalview = int(createReq.getAttribute('tv'))

    nextHost = 0
    hostSpec = createReq.getElementsByTagName('host-spec')
    if hostSpec:
        for node in hostSpec[0].childNodes:
            node = node.data.strip()
            hostnames = findall(r'\S+', node)
            for hostname in hostnames:
                if hostname:  # some may be the empty string
                    try:
                        ipaddr = gethostbyname_ex(hostname)[2][0]
                    except:
                        print 'unable to determine IP info for host %s' % (
                            hostname)
                        myExitStatus = -1  # used in main
                        exit(myExitStatus)  # really forces jump back into main
                    if ipaddr.startswith('127.0.0'):
                        hostList.append(myHost)
                    else:
                        hostList.append(ipaddr)
    if hostSpec and hostSpec[0].hasAttribute('check'):
        hostSpecMode = hostSpec[0].getAttribute('check')
        if hostSpecMode == 'yes':
            msgToSend = {'cmd': 'verify_hosts_in_ring', 'host_list': hostList}
            mpd_send_one_msg(conSocket, msgToSend)
            msg = recv_one_msg_with_timeout(conSocket, 5)
            if not msg:
                mpd_raise('no msg recvd from mpd mpd during chk hosts up')
            elif msg['cmd'] != 'verify_hosts_in_ring_response':
                mpd_raise('unexpected msg from mpd :%s:' % (msg))
            if msg['host_list']:
                print 'These hosts are not in the mpd ring:'
                for host in msg['host_list']:
                    if host[0].isdigit():
                        print '    %s' % (host),
                        try:
                            print ' (%s)' % (gethostbyaddr(host)[0])
                        except:
                            print ''
                    else:
                        print '    %s' % (host)
                myExitStatus = -1  # used in main
                exit(myExitStatus)  # really forces jump back into main

    covered = [0] * nprocs
    procSpec = createReq.getElementsByTagName('process-spec')
    if not procSpec:
        print 'No process-spec specified'
        usage()
    for p in procSpec:
        if p.hasAttribute('range'):
            therange = p.getAttribute('range')
            splitRange = therange.split('-')
            if len(splitRange) == 1:
                loRange = int(splitRange[0])
                hiRange = loRange
            else:
                (loRange, hiRange) = (int(splitRange[0]), int(splitRange[1]))
        else:
            (loRange, hiRange) = (0, nprocs - 1)
        for i in xrange(loRange, hiRange + 1):
            if i >= nprocs:
                print '*** exiting; rank %d is greater than nprocs for args' % (
                    i)
                myExitStatus = -1  # used in main
                exit(myExitStatus)  # really forces jump back into main
            if covered[i]:
                print '*** exiting; rank %d is doubly used in proc specs' % (i)
                myExitStatus = -1  # used in main
                exit(myExitStatus)  # really forces jump back into main
            covered[i] = 1
        if p.hasAttribute('exec'):
            execs[(loRange, hiRange)] = p.getAttribute('exec')
        else:
            print '*** exiting; range %d-%d has no exec' % (loRange, hiRange)
            myExitStatus = -1  # used in main
            exit(myExitStatus)  # really forces jump back into main
        if p.hasAttribute('user'):
            tempuser = p.getAttribute('user')
            try:
                pwent = getpwnam(tempuser)
            except:
                pwent = None
            if not pwent:
                print tempuser, 'is an invalid username'
                myExitStatus = -1  # used in main
                exit(myExitStatus)  # really forces jump back into main
            if tempuser == username or getuid() == 0:
                users[(loRange, hiRange)] = p.getAttribute('user')
            else:
                print tempuser, 'username does not match yours and you are not root'
                myExitStatus = -1  # used in main
                exit(myExitStatus)  # really forces jump back into main
        else:
            users[(loRange, hiRange)] = username
        if p.hasAttribute('cwd'):
            cwds[(loRange, hiRange)] = p.getAttribute('cwd')
        else:
            cwds[(loRange, hiRange)] = cwd
        if p.hasAttribute('path'):
            paths[(loRange, hiRange)] = p.getAttribute('path')
        else:
            paths[(loRange, hiRange)] = environ['PATH']
        if p.hasAttribute('host'):
            host = p.getAttribute('host')
            if host.startswith('_any_'):
                hosts[(loRange, hiRange)] = host
            else:
                try:
                    hosts[(loRange, hiRange)] = gethostbyname_ex(host)[2][0]
                except:
                    print 'unable to do find info for host %s' % (host)
                    myExitStatus = -1  # used in main
                    exit(myExitStatus)  # really forces jump back into main
        else:
            if hostList:
                hosts[(loRange, hiRange)] = '_any_from_pool_'
            else:
                hosts[(loRange, hiRange)] = '_any_'

        argDict = {}
        argList = p.getElementsByTagName('arg')
        for argElem in argList:
            argDict[int(
                argElem.getAttribute('idx'))] = argElem.getAttribute('value')
        argVals = [0] * len(argList)
        for i in argDict.keys():
            argVals[i - 1] = unquote(argDict[i])
        args[(loRange, hiRange)] = argVals

        limitDict = {}
        limitList = p.getElementsByTagName('limit')
        for limitElem in limitList:
            type = limitElem.getAttribute('type')
            if type in known_rlimit_types:
                limitDict[type] = limitElem.getAttribute('value')
            else:
                print 'mpdrun: invalid type in limit: %s' % (type)
                myExitStatus = -1  # used in main
                exit(myExitStatus)  # really forces jump back into main
        limits[(loRange, hiRange)] = limitDict

        envVals = {}
        envVarList = p.getElementsByTagName('env')
        for envVarElem in envVarList:
            envkey = envVarElem.getAttribute('name')
            envval = envVarElem.getAttribute('value')
            envVals[envkey] = envval
        envvars[(loRange, hiRange)] = envVals
Exemplo n.º 7
0
def mpdrun():
    global nprocs, pgm, pgmArgs, mship, rship, argsFilename, delArgsFile, \
           try0Locally, lineLabels, jobAlias, mergingOutput, conSocket
    global stdinGoesToWho, myExitStatus, manSocket, jobid, username, cwd, totalview
    global outXmlDoc, outXmlEC, outXmlFile, linesPerRank, gdb, gdbAttachJobid
    global execs, users, cwds, paths, args, envvars, limits, hosts, hostList
    global singinitPID, singinitPORT, doingBNR, myHost, myIP, myIfhn

    mpd_set_my_id('mpdrun_' + ` getpid() `)
    pgm = ''
    mship = ''
    rship = ''
    nprocs = 0
    jobAlias = ''
    argsFilename = ''
    outExitCodesFilename = ''
    outXmlFile = ''
    outXmlDoc = ''
    outXmlEC = ''
    delArgsFile = 0
    try0Locally = 1
    lineLabels = 0
    stdinGoesToWho = '0'
    mergingOutput = 0
    hostList = []
    gdb = 0
    gdbAttachJobid = ''
    singinitPID = 0
    singinitPORT = 0
    doingBNR = 0
    totalview = 0
    myHost = gethostname()  # default; may be chgd by -if arg
    myIfhn = ''
    known_rlimit_types = [
        'core', 'cpu', 'fsize', 'data', 'stack', 'rss', 'nproc', 'nofile',
        'ofile', 'memlock', 'as', 'vmem'
    ]
    username = mpd_get_my_username()
    cwd = path.abspath(getcwd())
    recvTimeout = 20

    execs = {}
    users = {}
    cwds = {}
    paths = {}
    args = {}
    envvars = {}
    limits = {}
    hosts = {}

    get_args_from_cmdline(
    )  # verify args as much as possible before connecting to mpd

    (listenSocket, listenPort) = mpd_get_inet_listen_socket('', 0)
    signal(SIGALRM, sig_handler)
    if environ.has_key('MPDRUN_TIMEOUT'):
        jobTimeout = int(environ['MPDRUN_TIMEOUT'])
    elif environ.has_key('MPIEXEC_TIMEOUT'):
        jobTimeout = int(environ['MPIEXEC_TIMEOUT'])
    else:
        jobTimeout = 0
    if environ.has_key('MPIEXEC_BNR'):
        doingBNR = 1
    if environ.has_key('UNIX_SOCKET'):
        conFD = int(environ['UNIX_SOCKET'])
        conSocket = fromfd(conFD, AF_UNIX, SOCK_STREAM)
        close(conFD)
    else:
        if environ.has_key('MPD_CON_EXT'):
            conExt = '_' + environ['MPD_CON_EXT']
        else:
            conExt = ''
        consoleName = '/tmp/mpd2.console_' + username + conExt
        conSocket = socket(AF_UNIX, SOCK_STREAM)  # note: UNIX socket
        try:
            conSocket.connect(consoleName)
        except Exception, errmsg:
            print 'cannot connect to local mpd (%s); possible causes:' % consoleName
            print '    1. no mpd running on this host'
            print '    2. mpd is running but was started without a "console" (-n option)'
            print 'you can start an mpd with the "mpd" command; to get help, run:'
            print '    mpd -h'
            myExitStatus = -1  # used in main
            exit(myExitStatus)  # really forces jump back into main
            # mpd_raise('cannot connect to local mpd; errmsg: %s' % (str(errmsg)) )
        msgToSend = 'realusername=%s\n' % username
        mpd_send_one_line(conSocket, msgToSend)
        msgToSend = {'cmd': 'get_mpd_version'}
        mpd_send_one_msg(conSocket, msgToSend)
        msg = recv_one_msg_with_timeout(conSocket, recvTimeout)
        if not msg:
            mpd_raise('no msg recvd from mpd during version check')
        elif msg['cmd'] != 'mpd_version_response':
            mpd_raise('unexpected msg from mpd :%s:' % (msg))
        if msg['mpd_version'] != mpd_version:
            mpd_raise('mpd version %s does not match mine %s' %
                      (msg['mpd_version'], mpd_version))
Exemplo n.º 8
0
    }
    if try0Locally:
        msgToSend['try_0_locally'] = 1
    if lineLabels:
        msgToSend['line_labels'] = 1
    if rship:
        msgToSend['rship'] = rship
        msgToSend['mship_host'] = gethostname()
        msgToSend['mship_port'] = mshipPort
    if doingBNR:
        msgToSend['doing_bnr'] = 1
    if stdinGoesToWho == 'all':
        stdinGoesToWho = '0-%d' % (nprocs - 1)
    msgToSend['stdin_goes_to_who'] = stdinGoesToWho

    mpd_send_one_msg(conSocket, msgToSend)
    msg = recv_one_msg_with_timeout(conSocket, recvTimeout)
    if not msg:
        mpd_raise('no msg recvd from mpd when expecting ack of request')
    elif msg['cmd'] == 'mpdrun_ack':
        currRingSize = msg['ringsize']
        currRingNCPUs = msg['ring_ncpus']
    else:
        if msg['cmd'] == 'already_have_a_console':
            print 'mpd already has a console (e.g. for long ringtest); try later'
            myExitStatus = -1  # used in main
            exit(myExitStatus)  # really forces jump back into main
        elif msg['cmd'] == 'job_failed':
            if msg['reason'] == 'some_procs_not_started':
                print 'mpdrun: unable to start all procs; may have invalid machine names'
                print '    remaining specified hosts:'
Exemplo n.º 9
0
            conExt = ''
        consoleName = '/tmp/mpd2.console_' + username + conExt
        conSocket = socket(AF_UNIX, SOCK_STREAM)             # note: UNIX socket
        try:
            conSocket.connect(consoleName)
        except Exception, errmsg:
            print 'mpdallexit: cannot connect to local mpd (%s); possible causes:' % consoleName
            print '    1. no mpd running on this host'
            print '    2. mpd is running but was started without a "console" (-n option)'
	    print 'you can start an mpd with the "mpd" command; to get help, run:'
	    print '    mpd -h'
            exit(-1)
            # mpd_raise('cannot connect to local mpd; errmsg: %s' % (str(errmsg)) )
        msgToSend = 'realusername=%s\n' % username
        mpd_send_one_line(conSocket,msgToSend)
    mpd_send_one_msg(conSocket, {'cmd':'mpdallexit'})
    msg = recv_one_msg_with_timeout(conSocket,5)
    if not msg:
        mpd_raise('no msg recvd from mpd before timeout')
    if msg['cmd'] != 'mpdallexit_ack':
        if msg['cmd'] == 'already_have_a_console':
            print 'mpd already has a console (e.g. for long ringtest); try later'
            exit(-1)
        elif msg['cmd'] == 'invalid_username_to_make_this_request':
            print 'you can not stop this mpd; it must have been started by root'
            exit(-1)
        else:
            print 'mpdallexit failed: unexpected message from mpd: %s' % (msg)
            exit(-1)
    conSocket.close()