Exemplo n.º 1
0
def run_mpifxcorr(rootname,
                  np,
                  mpi=None,
                  machine_file=None,
                  mpifxcorr=None,
                  input_file=None,
                  timeout=None):

    machine_file = os.path.abspath(rootname + '.machine')
    input_file = os.path.abspath(rootname + '.input')

    if mpi == None:
        mpi = observation.mpi
    if mpifxcorr == None:
        correlator = observation.mpifxcorr
    if timeout == None:
        timeout = observation.mpifxcorr_timeout

    check_outfile(input_file)

    import difxlog as log
    command = mpi + ' -nolocal -np ' + str(np) + ' -machinefile ' +\
              machine_file + ' ' + mpifxcorr + ' ' + input_file
    exec_time = get_parameter('EXECUTE TIME (SEC)', input_file)
    spawn(command, defreg, spawn_func, (time(), exec_time), timeout=120)
    log.info('Correlator Finished')
Exemplo n.º 2
0
def spawn(command,
          reg=defreg,
          reclass=spawnClass,
          classobj=spawn_class_obj,
          timeout=None):
    """
    command is the command to spawn
    reg is the regexp or list of regexp to pass to pexpect
    reclass is a class
      __init__ should take self, classobj and child
      run should take self and i
    it  takes i and child as arguments and should return 
    a nonzero value when the child returns EOF and 0 otherwise
    the nonzero value is returned.
    """
    import difxlog as log
    if timeout == None:
        timeout = observation.spawn_timeout
    log.info('spawning ' + command)
    log.info('timeout  ' + str(timeout) + 's')
    child = pspawn(command)
    cl = reclass(classobj, child)
    while 1:
        i = child.expect(reg, timeout)
        a = cl.run(i)
        if not a == 0:
            break
    return a
Exemplo n.º 3
0
def spawn_func(i, child, funcobj):
    if i == 0:
        start_time, exec_time = funcobj
        log.debug(child.before)
        a = retimestep.match(child.before)
        if a:
            timestep = int(a.group(2))
            realtime = int(100. * timestep / (time() - start_time))
            log.info(''.join(
                ('Timestep ', a.group(2), ' / ', exec_time, '.  ',
                 str(timestep * 100 / int(exec_time)), '% completed in ',
                 str(realtime), '% realtime.')))
        return 0
    # Could add some other regexps for detecting errors etc.
    # returning 2 or something like that
    if i == 1:
        return 1
Exemplo n.º 4
0
 def run(self, i):
     if i == 0:
         log.debug(self.child.before)
         a = retimestep.match(self.child.before)
         if a:
             this_time = time()
             this_timestep = float(a.group(2))
             if self.last_timestep == -1:
                 realtime = 0
                 time_elapsed = 0
                 current_realtime = 0
                 time_remaining = 0
             else:
                 realtime = (this_timestep / (this_time - self.start_time))
                 if realtime == 0:
                     realtime = 1
                 time_elapsed = this_time - self.start_time
                 current_realtime = self.int_time / (this_time -
                                                     self.last_time)
                 time_remaining = (self.exec_time -
                                   this_timestep) / realtime
             time_elapsed_str = "%3d:%02d:%02.0f" % df2hhms(
                 (time_elapsed + 0.5) / 86400.)
             time_remaining_str = "%3d:%02d:%02.0f" % df2hhms(
                 (time_remaining + 0.5) / 86400.)
             fullstring = "Completed %9.2f/%9.2fs(%2.0f%%)|Elapsed %s|Remaining %s|%3.0f%%" %\
                   (this_timestep,
                    self.exec_time,
                    100.0 * this_timestep / self.exec_time,
                    time_elapsed_str,
                    time_remaining_str,
                    100.0 * current_realtime + 0.5)
             log.info(fullstring)
             self.last_time = this_time
             self.last_timestep = this_timestep
         return 0
     # Could add some other regexps for detecting errors etc.
     # returning 2 or something like that
     if i == 1:
         return 1
Exemplo n.º 5
0
def check_threads(machine_file, input_file):
    # for our datastream there is simply one process per line in the machine file
    # so we could simply use that to work out the number of processes.
    # however this is more thorough.

    log.info('checking how processes are assigned')
    #set nprocesses to 1 to account for master
    n_processes = 1
    #find out the number of datastreams and add this to nprocesses
    active_datastreams = int(get_parameter('ACTIVE DATASTREAMS', input_file))
    n_processes += active_datastreams

    #open machine file and thread file
    mf = open(machine_file, 'r')
    thread_file = get_parameter('CORE CONF FILENAME', input_file)
    tf = open(thread_file, 'r')
    #note which machine will be the manager
    manager = mf.next().strip()
    #note which machines will be the datastreams
    datastreams = []
    for i in range(active_datastreams):
        datastreams.append(mf.next().strip())
    #find out number of cores from first line of the threads file and work out
    #and work out which machines will be cores and how many threads will run on
    #each
    cores = []
    threads = []
    ncores = int(tf.next().split(':')[1].strip())
    for i in range(ncores):
        try:
            threads.append(int(tf.next()))
        except StopIteration:
            raise RuntimeError, 'faulty .threads file'
        try:
            cores.append(mf.next().strip())
        except StopIteration:
            log.warning(
                'warning, mismatch in number of cores and number of machines in machine file'
            )
    n_processes += sum(threads)
    log.info('machine file    ' + machine_file)
    log.info('thread file     ' + thread_file)
    log.info('n_processes     ' + str(n_processes))
    log.info('manager         ' + manager)
    for i in range(active_datastreams):
        log.info('datastream ' + str(i + 1).rjust(4) + ' ' + datastreams[i])
    for i in range(ncores):
        log.info('core ' + str(i + 1).rjust(4) + '       ' +
                 cores[i].rjust(2) + ' ' + str(threads[i]).rjust(2) +
                 ' thread(s)')
    return {
        'n_processes': n_processes,
        'manager': manager,
        'datastreams': datastreams,
        'cores': cores,
        'threads': threads
    }
Exemplo n.º 6
0
def check_threads(machine_file, input_file):
    log.info('checking how processes are assigned')
    #find out the number of datastreams and add this to nprocesses
    active_datastreams = int(get_parameter('ACTIVE DATASTREAMS', input_file))

    #open machine file and thread file
    mf = open(machine_file, 'r')
    thread_file = get_parameter('CORE CONF FILENAME', input_file)
    tf = open(thread_file, 'r')

    # parse machine file
    mftable = []
    for line in mf:
        if '#' in line:
            line = line.split('#')[0]
            if line == '':
                continue
        line = line.split()
        mfline = {}
        mfline['host'] = line[0]
        for entry in line[1:]:
            entry = entry.split('=')
            mfline[entry[0]] = int(entry[1])
        mftable.append(mfline)

    #remove unused nodes and add up total number of processes

    np = 0
    nodes = []
    for host in mftable:
        node = {}
        node['host'] = host['host']
        node['processes'] = []
        if host.has_key('max_slots'):
            node['np'] = host['max_slots']
            node['ncores'] = node['np']
            np += node['np']
        elif host.has_key('slots'):
            node['np'] = host['slots']
            node['ncores'] = node['np']
            np += node['np']
        else:
            node['np'] = 1
            node['ncores'] = node['np']
            np += node['np']
        if node['np'] > 0:
            nodes.append(node)
        print node['host'], str(node['np']), str(np)

    print nodes
    # note which machine will be the manager
    # note which machines will be the datastreams
    if not np > active_datastreams:
        raise RuntimeError, "Not enough available slots to run FXManager and datastreams"
    manager = None
    datastreams = []
    datastreams_assigned = 0
    while (datastreams_assigned < active_datastreams):
        for node in nodes:
            #will only run once
            if not manager:
                manager = node['host']
                node['ncores'] -= 1
                node['processes'].append('FXManager')
            else:
                if datastreams_assigned < active_datastreams:
                    datastreams_assigned += 1
                    datastreams.append(node['host'])
                    node['ncores'] -= 1
                    node['processes'].append('datastream ' +
                                             str(datastreams_assigned))

    log.info('Machine file    ' + machine_file)
    log.info('Thread file     ' + thread_file)
    log.info('np              ' + str(np))

    log.info('Hosts:')
    for node in nodes:
        log.info("    " + node['host'] + ':')
        for process in node['processes']:
            log.info("        " + process)
        if node['ncores'] > 0:
            log.info("         + " + str(node['ncores']) + " core(s)")
        log.info('')
    return np
Exemplo n.º 7
0
def run_mpifxcorr(rootname,
                  np,
                  mpi=None,
                  machine_file=None,
                  mpifxcorr=None,
                  input_file=None,
                  timeout=None):
    # machine_file = os.path.abspath(rootname + '.machine')
    # input_file = os.path.abspath(rootname + '.input')

    if mpi == None:
        mpi = observation.mpi
    if mpifxcorr == None:
        mpifxcorr = observation.mpifxcorr
    if timeout == None:
        timeout = observation.mpifxcorr_timeout

    check_outfile(input_file)

    import difxlog as log
    #        ' -x LD_LIBRARY_PATH' +\
    command = mpi + ' -np ' + str(np) +\
              ' -machinefile ' + machine_file + ' ' +\
              ' -byslot ' +\
              mpifxcorr + ' ' + input_file
    exec_time = get_parameter('EXECUTE TIME (SEC)', input_file)
    int_time = float(get_parameter('INT TIME (SEC)', input_file))
    log.info('Int time           = ' + str(int_time))
    log.info('num channels       = ' +
             get_parameter('NUM CHANNELS', input_file))
    log.info('Vis Buffer Length  = ' +
             get_parameter('VIS BUFFER LENGTH', input_file))
    log.info('Blocks Per Send    = ' +
             get_parameter('BLOCKS PER SEND', input_file))
    log.info('Data Buffer Factor = ' +
             get_parameter('DATA BUFFER FACTOR', input_file))
    log.info('num Data Segments  = ' +
             get_parameter('NUM DATA SEGMENTS', input_file))
    spawn(command,
          defreg,
          mpifxcorrSpawnClass, (time(), exec_time, int_time),
          timeout=timeout)
    log.info('Correlator Finished')