Exemplo n.º 1
0
    def mem_update(self, vms_params, size, speed):
        """Copy, compile memtouch, calibrate and return memtouch action """
        vms = [Host(vm_param['ip'] + '.g5k') for vm_param in vms_params]
        logger.info('VMS: %s', pformat(vms))

        files = ['memtouch/memtouch-with-busyloop3.c']
        putfiles = Put(vms, files).run()
        puttries = 1
        while (not putfiles.ok()) and puttries < 5:
            puttries += 1
            sleep(5)
            files = ['memtouch/memtouch-with-busyloop3.c']
            putfiles = Put(vms, files).run()
        if not putfiles.ok():
            return 'ERROR'
        Remote(
            'gcc -O2 -lm -std=gnu99 -Wall memtouch-with-busyloop3.c -o memtouch-with-busyloop3',
            vms).run()
        calibration = Remote(
            './memtouch-with-busyloop3 --cmd-calibrate ' + str(size),
            [vms[0]]).run()
        args = ''
        for p in calibration.processes():
            for line in p.stdout().split('\n'):
                if '--cpu-speed' in line:
                    args = line
        logger.debug('%s', args)
        return Remote(
            './memtouch-with-busyloop3 --cmd-makeload ' + args + ' ' +
            str(size) + ' ' + str(speed), vms)
Exemplo n.º 2
0
def wait_oargrid_job_start(oargrid_job_id = None, frontend_connection_params = None, timeout = False):
    """Sleep until an oargrid job's start time.

    :param oargrid_job_id: the oargrid job id.

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is False, which
      means use
      ``execo_g5k.config.g5k_configuration['default_timeout']``. None
      means no timeout.
    """
    sleep(until = get_oargrid_job_info(oargrid_job_id, frontend_connection_params, timeout)['start_date'])
Exemplo n.º 3
0
def wait_oargrid_job_start(oargrid_job_id=None,
                           frontend_connection_params=None,
                           timeout=False):
    """Sleep until an oargrid job's start time.

    :param oargrid_job_id: the oargrid job id.

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is False, which
      means use
      ``execo_g5k.config.g5k_configuration['default_timeout']``. None
      means no timeout.
    """
    sleep(until=get_oargrid_job_info(
        oargrid_job_id, frontend_connection_params, timeout)['start_date'])
Exemplo n.º 4
0
 def mem_update(self, vms_params, size, speed):
     """Copy, compile memtouch, calibrate and return memtouch action """
     vms = [ Host(vm_param['ip']+'.g5k') for vm_param in vms_params ]
     logger.info('VMS: %s', pformat (vms) )
     
     files = [ 'memtouch/memtouch-with-busyloop3.c' ] 
     putfiles = Put(vms, files).run()
     puttries = 1
     while (not putfiles.ok()) and puttries < 5:
         puttries += 1
         sleep(5)            
         files = [ 'memtouch/memtouch-with-busyloop3.c' ] 
         putfiles = Put(vms, files).run()
     if not putfiles.ok():
         return 'ERROR'
     Remote('gcc -O2 -lm -std=gnu99 -Wall memtouch-with-busyloop3.c -o memtouch-with-busyloop3', vms ).run()
     calibration = Remote('./memtouch-with-busyloop3 --cmd-calibrate '+str(size), [vms[0]] ).run()
     args = ''
     for p in calibration.processes():
         for line in p.stdout().split('\n'):
             if '--cpu-speed' in line:
                 args = line
     logger.debug('%s', args)
     return Remote('./memtouch-with-busyloop3 --cmd-makeload '+args+' '+str(size)+' '+str(speed), vms)       
Exemplo n.º 5
0
def wait_vms_have_started(vms, restart=True):
    """Scan port 22 on all vms, distributed on hosts"""
    # Creating file with list of VMs ip
    fd, tmpfile = tempfile.mkstemp(prefix='vmips')
    f = fdopen(fd, 'w')
    for vm in vms:
        f.write(vm['ip'] + '\n')
    f.close()
    # getting the list of host
    hosts = list(set([vm['host'] for vm in vms]))
    hosts.sort()
    # Pushing file on all hosts
    TaktukPut(hosts, [tmpfile]).run()
    logger.debug(pformat(hosts))
    # Splitting nmap scan
    n_vm_scan = ceil(len(vms) / len(hosts)) + 1
    cmds = []
    for i in range(len(hosts)):
        start = str(int(i * n_vm_scan))
        end = str(int((i + 1) * n_vm_scan))
        cmds.append("awk 'NR>=" + start + " && NR<" + end +
                    "' " + tmpfile.split('/')[-1] + " > nmap_file ; "
                    + "nmap -v -oG - -i nmap_file -p 22")
    logger.debug('%s', pformat(cmds))
    nmap = TaktukRemote('{{cmds}}', hosts)
    nmap_tries = 0
    all_up = False
    started_vms = []
    old_started = started_vms[:]
    while (not all_up) and nmap_tries < 10:
        sleep(15)
        logger.detail('nmap_tries %s', nmap_tries)
        nmap.run()
        for p in nmap.processes:
            for line in p.stdout.split('\n'):
                if 'Status' in line:
                    split_line = line.split(' ')
                    ip = split_line[1]
                    state = split_line[3].strip()
                    if state == 'Up':
                        vm = [vm for vm in vms if vm['ip'] == ip]
                        if len(vm) > 0:
                            vm[0]['state'] = 'OK'

        started_vms = [vm for vm in vms if vm['state'] == 'OK']
        all_up = len(started_vms) == len(vms)
        if started_vms != old_started:
            old_started = started_vms
        else:
            if restart:
                restart_vms([vm for vm in vms if vm['state'] == 'KO'])
            nmap_tries += 1
        if nmap_tries == 1:
            activate_vms([vm for vm in vms if vm['state'] == 'KO'])
        if not all_up:
            logger.info(str(nmap_tries) + ': ' + str(len(started_vms)) + '/' +
                        str(len(vms)))
        nmap.reset()

    TaktukRemote('rm ' + tmpfile.split('/')[-1], hosts).run()
    Process('rm ' + tmpfile).run()
    if all_up:
        logger.info('All VM have been started')
        return True
    else:
        logger.error('All VM have not been started')
        return False
Exemplo n.º 6
0
    p.shell = True
    p.nolog_exit_code = ignore_exit_code = True
kill_stress.run()

logger.info('Starting memtouch process')
cmd = './memtouch-with-busyloop3 --cmd-makeload ' +\
    '--cpu-speed 304408.621872 --mem-speed 63235516.087661 128 128'
stress = TaktukRemote(cmd, vms.values()).start()

all_started = False
while not all_started:
    all_started = True
    for p in stress.processes:
        if not p.started:
            all_started = False
            sleep(1)
            break

logger.info('Retrieving memtouch process id')
cmd = 'ps aux | grep "memtouch-with-busyloop3" | grep -v "grep" | awk \'{print $2}\''
get_stress = TaktukRemote(cmd, vms.values()).run()

logger.info('Limiting memtouch to 1% cpu via cpulimit')
tmp_vms = []
processes = []
vms_proc = {}
for p in get_stress.processes:
    tmp_vms.append(p.host.address)
    processes.append(p.stdout.strip())
    vms_proc[p.host.address] = p.stdout.strip()
init_limit = TaktukRemote('cpulimit -p {{processes}} -l 1', tmp_vms)
Exemplo n.º 7
0
def wait_oar_job_start(oar_job_id=None,
                       frontend=None,
                       frontend_connection_params=None,
                       timeout=None,
                       prediction_callback=None):
    """Sleep until an oar job's start time.

    As long as the job isn't scheduled, wait_oar_job_start will sleep
    / poll every
    ``execo_g5k.config.g5k_configuration['polling_interval']`` seconds
    until it is scheduled. Then, knowing its start date, it will sleep
    the amount of time necessary to wait for the job start.

    returns True if wait was successful, False otherwise (job
    cancelled, error)

    :param oar_job_id: the oar job id. If None given, will try to get
      it from ``OAR_JOB_ID`` environment variable.

    :param frontend: the frontend of the oar job. If None given, use
      default frontend.

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is None (no
      timeout).

    :param prediction_callback: function taking a unix timestamp as
      parameter. This function will be called each time oar job start
      prediction changes.
    """
    def check_prediction_changed(prediction, new_prediction):
        old_prediction = prediction
        prediction = new_prediction
        if prediction != old_prediction:
            if prediction_callback != None:
                prediction_callback(prediction)
        return prediction

    prediction = None
    countdown = Timer(timeout)
    while countdown.remaining() == None or countdown.remaining() > 0:
        infos = get_oar_job_info(oar_job_id,
                                 frontend,
                                 frontend_connection_params,
                                 countdown.remaining(),
                                 nolog_exit_code=True,
                                 nolog_timeout=True,
                                 nolog_error=True)
        now = time.time()
        if 'start_date' in infos or 'scheduled_start' in infos:
            if 'start_date' in infos:
                new_prediction = infos['start_date']
            elif 'scheduled_start' in infos:
                new_prediction = infos['scheduled_start']
            prediction = check_prediction_changed(prediction, new_prediction)
        if 'state' in infos:
            if infos['state'] == "Terminated" or infos['state'] == "Error":
                return False
            if infos['state'] == "Running":
                return True
        if 'start_date' in infos or 'scheduled_start' in infos:
            if now >= new_prediction:
                sleep(
                    checked_min(g5k_configuration.get('tiny_polling_interval'),
                                countdown.remaining()))
                continue
            elif now + g5k_configuration.get(
                    'polling_interval') > new_prediction:
                sleep(until=checked_min(
                    new_prediction, now + countdown.remaining()
                    if countdown.remaining() != None else None))
                continue
        sleep(
            checked_min(g5k_configuration.get('polling_interval'),
                        countdown.remaining()))
    return False
Exemplo n.º 8
0
    p.shell = True
    p.nolog_exit_code = ignore_exit_code = True
kill_stress.run()
 
logger.info('Starting memtouch process')
cmd = './memtouch-with-busyloop3 --cmd-makeload ' +\
    '--cpu-speed 304408.621872 --mem-speed 63235516.087661 128 128'
stress = TaktukRemote(cmd, vms.values()).start()
 
all_started = False
while not all_started:
    all_started = True
    for p in stress.processes:
        if not p.started:
            all_started = False
            sleep(1)
            break 

logger.info('Retrieving memtouch process id') 
cmd = 'ps aux | grep "memtouch-with-busyloop3" | grep -v "grep" | awk \'{print $2}\''
get_stress = TaktukRemote(cmd, vms.values()).run()

logger.info('Limiting memtouch to 1% cpu via cpulimit')
tmp_vms = []
processes = []
vms_proc = {}
for p in get_stress.processes:
    tmp_vms.append(p.host.address)
    processes.append(p.stdout.strip())
    vms_proc[p.host.address] = p.stdout.strip()
init_limit = TaktukRemote('cpulimit -p {{processes}} -l 1', tmp_vms)
Exemplo n.º 9
0
def wait_vms_have_started(vms, restart=True):
    """Scan port 22 on all vms, distributed on hosts"""
    # Creating file with list of VMs ip
    fd, tmpfile = tempfile.mkstemp(prefix='vmips')
    f = fdopen(fd, 'w')
    for vm in vms:
        f.write(vm['ip'] + '\n')
    f.close()
    # getting the list of host
    hosts = list(set([vm['host'] for vm in vms]))
    hosts.sort()
    # Pushing file on all hosts
    TaktukPut(hosts, [tmpfile]).run()
    logger.debug(pformat(hosts))
    # Splitting nmap scan
    n_vm_scan = ceil(len(vms) / len(hosts)) + 1
    cmds = []
    for i in range(len(hosts)):
        start = str(int(i * n_vm_scan))
        end = str(int((i + 1) * n_vm_scan))
        cmds.append("awk 'NR>=" + start + " && NR<" + end + "' " +
                    tmpfile.split('/')[-1] + " > nmap_file ; " +
                    "nmap -v -oG - -i nmap_file -p 22")
    logger.debug('%s', pformat(cmds))
    nmap = TaktukRemote('{{cmds}}', hosts)
    nmap_tries = 0
    all_up = False
    started_vms = []
    old_started = started_vms[:]
    while (not all_up) and nmap_tries < 10:
        sleep(15)
        logger.detail('nmap_tries %s', nmap_tries)
        nmap.run()
        for p in nmap.processes:
            for line in p.stdout.split('\n'):
                if 'Status' in line:
                    split_line = line.split(' ')
                    ip = split_line[1]
                    state = split_line[3].strip()
                    if state == 'Up':
                        vm = [vm for vm in vms if vm['ip'] == ip]
                        if len(vm) > 0:
                            vm[0]['state'] = 'OK'

        started_vms = [vm for vm in vms if vm['state'] == 'OK']
        all_up = len(started_vms) == len(vms)
        if started_vms != old_started:
            old_started = started_vms
        else:
            if restart:
                restart_vms([vm for vm in vms if vm['state'] == 'KO'])
            nmap_tries += 1
        if nmap_tries == 1:
            activate_vms([vm for vm in vms if vm['state'] == 'KO'])
        if not all_up:
            logger.info(
                str(nmap_tries) + ': ' + str(len(started_vms)) + '/' +
                str(len(vms)))
        nmap.reset()

    TaktukRemote('rm ' + tmpfile.split('/')[-1], hosts).run()
    Process('rm ' + tmpfile).run()
    if all_up:
        logger.info('All VM have been started')
        return True
    else:
        logger.error('All VM have not been started')
        return False
Exemplo n.º 10
0
    def workflow(self, comb):
        exit_string = set_style('\nABORTING WORKFLOW\n', 'report_error')
        logger.info('%s \n%s',
                    set_style('Performing measurements for: ', 'report_error'),
                    pformat(comb))

        logger.info('%s', set_style('Defining VM parameters', 'parameter'))
        cpusets = {
            'vm-' + str(i_vm): '0'
            for i_vm in range(1 + 2 * comb['cpu_load'])
        }

        self.vms_params = define_vms_params(1 + 2 * comb['cpu_load'],
                                            self.ip_mac,
                                            vms_params=[],
                                            mem_size=comb['mem_size'],
                                            cpusets=cpusets)

        logger.info('%s', set_style('Creating VM disks', 'parameter'))
        if not create_disks(self.hosts, self.vms_params):
            logger.error('Unable to create the disks, %s', exit_string)
            return False

        destroy_all(self.hosts)

        logger.info(
            '%s',
            set_style('Performing migration with other VM on node SRC ',
                      'user2'))

        mig_vm = self.vms_params[0]

        static_vms = list(self.vms_params)
        static_vms.remove(mig_vm)
        split_vms = split_vm(static_vms)

        if not install([mig_vm], self.hosts[0]):
            logger.error('Unable to install the migrating VM, %s', exit_string)
            return False
        if len(split_vms[0]) > 0:
            if not install(split_vms[0], self.hosts[0]):
                logger.error('Unable to install the colocated VM, %s',
                             exit_string)
                return False

        logger.info(
            '%s', set_style('Launching ping probes from frontend',
                            'parameter'))
        pingprobes = self.ping_probes(self.vms_params, comb['cluster'])
        pingprobes.start()

        stress = self.mem_update([mig_vm] + split_vms[0],
                                 size=comb['mem_size'] * 0.9,
                                 speed=comb['mig_bw'] *
                                 comb['mem_update_rate'] / 100)

        if stress == 'ERROR':
            return False

        logger.info(
            '%s %s', set_style('Starting stress on', 'parameter'), ' '.join([
                set_style(param['vm_id'], 'object_repr')
                for param in split_vms[0] + [mig_vm]
            ]))
        stress.start()

        sleep(comb['mem_size'] * comb['mem_update_rate'] / 10000)

        measurements_loop(self.options.n_measure, [mig_vm],
                          self.hosts,
                          twonodes_migrations,
                          'sequential',
                          label='ONE',
                          mig_speed=comb['mig_bw'])
        stress.kill()

        #        logger.info('%s', set_style('Performing migration with other VM on BOTH nodes ', 'user2'))
        #        destroy_all( self.hosts )
        #
        #        if not install([mig_vm], self.hosts[0]):
        #            logger.error('Unable to install the migrating VM, %s', exit_string)
        #            return False
        #
        #        if len(split_vms[0]) > 0:
        #            if not install( split_vms[0], self.hosts[0]):
        #                logger.error('Unable to install the colocated VM on SRC, %s', exit_string)
        #                return False
        #            if not install( split_vms[1], self.hosts[1]):
        #                logger.error('Unable to install the colocated VM on DST, %s', exit_string)
        #                return False
        #
        #        stress = self.mem_update( self.vms_params , size = comb['mem_size'] * 0.9,
        #                                  speed = comb['mig_bw']*comb['mem_update_rate']/100 )
        #        if stress == 'ERROR':
        #            return False
        #
        #        logger.info('%s %s', set_style('Starting stress on ', 'parameter'),
        #                    ' '.join([set_style(param['vm_id'], 'object_repr') for param in self.vms_params ]))
        #        stress.start()
        #        sleep( comb['mem_size'] * comb['mem_update_rate']/ 10000 )
        #
        #        measurements_loop(self.options.n_measure, [mig_vm], self.hosts, twonodes_migrations,
        #                      'sequential', label = 'BOTH', mig_speed = comb['mig_bw'] )
        #        stress.kill()
        destroy_all(self.hosts)

        pingprobes.kill()

        return True
Exemplo n.º 11
0
    def workflow(self, comb):
        exit_string = set_style('\nABORTING WORKFLOW\n', 'report_error')
        logger.info('%s \n%s', set_style('Performing measurements for: ', 'report_error'), pformat(comb))
        
        logger.info('%s', set_style('Defining VM parameters', 'parameter'))
        cpusets = {'vm-'+str(i_vm): '0' for i_vm in range(1 + 2*comb['cpu_load'])}

        self.vms_params = define_vms_params( 1 + 2*comb['cpu_load'], self.ip_mac, vms_params = [],
                                        mem_size = comb['mem_size'], cpusets = cpusets)
        
        logger.info('%s', set_style('Creating VM disks', 'parameter'))
        if not create_disks( self.hosts, self.vms_params):
            logger.error('Unable to create the disks, %s', exit_string)
            return False
        
        destroy_all( self.hosts )
        
        logger.info('%s', set_style('Performing migration with other VM on node SRC ', 'user2'))
        
        mig_vm = self.vms_params[0]
        
        static_vms = list(self.vms_params) 
        static_vms.remove(mig_vm)
        split_vms = split_vm(static_vms)
        
        if not install([mig_vm], self.hosts[0]):
            logger.error('Unable to install the migrating VM, %s', exit_string)
            return False
        if len(split_vms[0]) > 0:
            if not install( split_vms[0], self.hosts[0]):
                logger.error('Unable to install the colocated VM, %s', exit_string)
                return False
        
        
        logger.info('%s', set_style('Launching ping probes from frontend', 'parameter'))
        pingprobes = self.ping_probes( self.vms_params, comb['cluster'] )
        pingprobes.start()
              
          
        stress = self.mem_update( [mig_vm]+split_vms[0], size = comb['mem_size'] * 0.9, 
                                  speed = comb['mig_bw']*comb['mem_update_rate']/100 )
        
        if stress == 'ERROR':
            return False
        
        logger.info('%s %s', set_style('Starting stress on', 'parameter'),
                    ' '.join([set_style(param['vm_id'], 'object_repr') for param in split_vms[0]+[mig_vm]]))
        stress.start()
        
        sleep( comb['mem_size'] * comb['mem_update_rate']/ 10000 )
        
        measurements_loop(self.options.n_measure, [mig_vm], self.hosts, twonodes_migrations, 
                      'sequential', label = 'ONE', mig_speed = comb['mig_bw'] )
        stress.kill()
        
#        logger.info('%s', set_style('Performing migration with other VM on BOTH nodes ', 'user2'))
#        destroy_all( self.hosts )
#        
#        if not install([mig_vm], self.hosts[0]):
#            logger.error('Unable to install the migrating VM, %s', exit_string)
#            return False
#         
#        if len(split_vms[0]) > 0:
#            if not install( split_vms[0], self.hosts[0]):
#                logger.error('Unable to install the colocated VM on SRC, %s', exit_string)
#                return False
#            if not install( split_vms[1], self.hosts[1]):
#                logger.error('Unable to install the colocated VM on DST, %s', exit_string)
#                return False        
#            
#        stress = self.mem_update( self.vms_params , size = comb['mem_size'] * 0.9, 
#                                  speed = comb['mig_bw']*comb['mem_update_rate']/100 )
#        if stress == 'ERROR':
#            return False
#        
#        logger.info('%s %s', set_style('Starting stress on ', 'parameter'),
#                    ' '.join([set_style(param['vm_id'], 'object_repr') for param in self.vms_params ]))
#        stress.start()
#        sleep( comb['mem_size'] * comb['mem_update_rate']/ 10000 )
#                    
#        measurements_loop(self.options.n_measure, [mig_vm], self.hosts, twonodes_migrations, 
#                      'sequential', label = 'BOTH', mig_speed = comb['mig_bw'] )
#        stress.kill()
        destroy_all( self.hosts )
         
        pingprobes.kill()
        
        return True
Exemplo n.º 12
0
def wait_oar_job_start(oar_job_id = None, frontend = None,
                       frontend_connection_params = None,
                       timeout = None,
                       prediction_callback = None):
    """Sleep until an oar job's start time.

    As long as the job isn't scheduled, wait_oar_job_start will sleep
    / poll every
    ``execo_g5k.config.g5k_configuration['polling_interval']`` seconds
    until it is scheduled. Then, knowing its start date, it will sleep
    the amount of time necessary to wait for the job start.

    returns True if wait was successful, False otherwise (job
    cancelled, error)

    :param oar_job_id: the oar job id. If None given, will try to get
      it from ``OAR_JOB_ID`` environment variable.

    :param frontend: the frontend of the oar job. If None given, use
      default frontend.

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is None (no
      timeout).

    :param prediction_callback: function taking a unix timestamp as
      parameter. This function will be called each time oar job start
      prediction changes.
    """

    def check_prediction_changed(prediction, new_prediction):
        old_prediction = prediction
        prediction = new_prediction
        if prediction != old_prediction:
            if prediction_callback != None:
                prediction_callback(prediction)
        return prediction

    prediction = None
    countdown = Timer(timeout)
    while countdown.remaining() == None or countdown.remaining() > 0:
        infos = get_oar_job_info(oar_job_id, frontend, frontend_connection_params,
                                 countdown.remaining(), nolog_exit_code = True,
                                 nolog_timeout = True, nolog_error = True)
        now = time.time()
        if 'start_date' in infos or 'scheduled_start' in infos:
            if 'start_date' in infos:
                new_prediction = infos['start_date']
            elif 'scheduled_start' in infos:
                new_prediction = infos['scheduled_start']
            prediction = check_prediction_changed(prediction, new_prediction)
        if 'state' in infos:
            if infos['state'] == "Terminated" or infos['state'] == "Error":
                return False
            if infos['state'] == "Running":
                return True
        if 'start_date' in infos or 'scheduled_start' in infos:
            if now >= new_prediction:
                sleep(checked_min(g5k_configuration.get('tiny_polling_interval'), countdown.remaining()))
                continue
            elif now + g5k_configuration.get('polling_interval') > new_prediction:
                sleep(until = checked_min(new_prediction, now + countdown.remaining() if countdown.remaining() != None else None))
                continue
        sleep(checked_min(g5k_configuration.get('polling_interval'), countdown.remaining()))
    return False