class PTLTestData(Plugin): """ Save post analysis data on test cases failure or error """ name = 'PTLTestData' score = sys.maxint - 6 logger = logging.getLogger(__name__) def __init__(self): Plugin.__init__(self) self.post_data_dir = None self.max_postdata_threshold = None self.__save_data_count = 0 self.__priv_sn = '' self.du = DshUtils() def options(self, parser, env): """ Register command line options """ pass def set_data(self, post_data_dir, max_postdata_threshold): self.post_data_dir = post_data_dir self.max_postdata_threshold = max_postdata_threshold def configure(self, options, config): """ Configure the plugin and system, based on selected options """ self.config = config if self.post_data_dir is not None: self.enabled = True else: self.enabled = False def __save_home(self, test, status, err=None): if hasattr(test, 'test'): _test = test.test sn = _test.__class__.__name__ elif hasattr(test, 'context'): _test = test.context sn = _test.__name__ else: # test does not have any PBS Objects, so just return return if self.__priv_sn != sn: self.__save_data_count = 0 self.__priv_sn = sn # Saving home might take time so disable timeout # handler set by runner tn = getattr(_test, '_testMethodName', 'unknown') testlogs = getattr(test, 'captured_logs', '') datadir = os.path.join(self.post_data_dir, sn, tn) if os.path.exists(datadir): _msg = 'Old post analysis data exists at %s' % datadir _msg += ', skipping saving data for this test case' self.logger.warn(_msg) _msg = 'Please remove old directory or' _msg += ' provide different directory' self.logger.warn(_msg) return if getattr(test, 'old_sigalrm_handler', None) is not None: _h = getattr(test, 'old_sigalrm_handler') signal.signal(signal.SIGALRM, _h) signal.alarm(0) self.logger.log(logging.DEBUG2, 'Saving post analysis data...') current_host = socket.gethostname().split('.')[0] self.du.mkdir(current_host, path=datadir, mode=0755, parents=True, logerr=False, level=logging.DEBUG2) if err is not None: if isclass(err[0]) and issubclass(err[0], SkipTest): status = 'SKIP' status_data = 'Reason = %s' % (err[1]) else: if isclass(err[0]) and issubclass(err[0], TimeOut): status = 'TIMEDOUT' status_data = getattr(test, 'err_in_string', '') else: status_data = '' logfile = os.path.join(datadir, 'logfile_' + status) f = open(logfile, 'w+') f.write(testlogs + '\n') f.write(status_data + '\n') f.write('test duration: %s\n' % str(getattr(test, 'duration', '0'))) if status in ('PASS', 'SKIP'): # Test case passed or skipped, no need to save post analysis data f.close() return if ((self.max_postdata_threshold != 0) and (self.__save_data_count >= self.max_postdata_threshold)): _msg = 'Total number of saved post analysis data for this' _msg += ' testsuite is exceeded max postdata threshold' _msg += ' (%d)' % self.max_postdata_threshold f.write(_msg + '\n') self.logger.error(_msg) f.close() return svr = getattr(_test, 'server', None) if svr is not None: svr_host = svr.hostname else: _msg = 'Could not find Server Object in given test object' _msg += ', skipping saving post analysis data' f.write(_msg + '\n') self.logger.warning(_msg) f.close() return pbs_diag = os.path.join(svr.pbs_conf['PBS_EXEC'], 'unsupported', 'pbs_diag') cmd = [pbs_diag, '-f', '-d', '2'] cmd += ['-u', self.du.get_current_user()] if len(svr.jobs) > 0: cmd += ['-j', ','.join(svr.jobs.keys())] ret = self.du.run_cmd(svr_host, cmd, sudo=True, level=logging.DEBUG2) if ret['rc'] != 0: _msg = 'Failed to get diag information for ' _msg += 'on %s:' % svr_host _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n' f.write(_msg + '\n') self.logger.error(_msg) f.close() return else: diag_re = r"(?P<path>\/.*\/pbs_diag_[\d]+_[\d]+\.tar\.gz).*" m = re.search(diag_re, '\n'.join(ret['out'])) if m is not None: diag_out = m.group('path') else: _msg = 'Failed to find generated diag path in below output:' _msg += '\n\n' + '-' * 80 + '\n' _msg += '\n'.join(ret['out']) + '\n' _msg += '-' * 80 + '\n\n' f.write(_msg) self.logger.error(_msg) f.close() return diag_out_dest = os.path.join(datadir, os.path.basename(diag_out)) if not self.du.is_localhost(svr_host): diag_out_r = svr_host + ':' + diag_out else: diag_out_r = diag_out ret = self.du.run_copy(current_host, diag_out_r, diag_out_dest, sudo=True, level=logging.DEBUG2) if ret['rc'] != 0: _msg = 'Failed to copy generated diag from' _msg += ' %s to %s' % (diag_out_r, diag_out_dest) f.write(_msg + '\n') self.logger.error(_msg) f.close() return else: self.du.rm(svr_host, path=diag_out, sudo=True, force=True, level=logging.DEBUG2) cores = [] dir_list = ['server_priv', 'sched_priv', 'mom_priv'] for d in dir_list: path = os.path.join(svr.pbs_conf['PBS_HOME'], d) files = self.du.listdir(hostname=svr_host, path=path, sudo=True, level=logging.DEBUG2) for _f in files: if os.path.basename(_f).startswith('core'): cores.append(_f) cores = list(set(cores)) if len(cores) > 0: cmd = ['gunzip', diag_out_dest] ret = self.du.run_cmd(current_host, cmd, sudo=True, level=logging.DEBUG2) if ret['rc'] != 0: _msg = 'Failed unzip generated diag at %s:' % diag_out_dest _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n' f.write(_msg + '\n') self.logger.error(_msg) f.close() return diag_out_dest = diag_out_dest.rstrip('.gz') cmd = ['tar', '-xf', diag_out_dest, '-C', datadir] ret = self.du.run_cmd(current_host, cmd, sudo=True, level=logging.DEBUG2) if ret['rc'] != 0: _msg = 'Failed extract generated diag %s' % diag_out_dest _msg += ' to %s:' % datadir _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n' f.write(_msg + '\n') self.logger.error(_msg) f.close() return self.du.rm(hostname=current_host, path=diag_out_dest, force=True, sudo=True, level=logging.DEBUG2) diag_out_dest = diag_out_dest.rstrip('.tar') for c in cores: cmd = [pbs_diag, '-g', c] ret = self.du.run_cmd(svr_host, cmd, sudo=True, level=logging.DEBUG2) if ret['rc'] != 0: _msg = 'Failed to get core file information for ' _msg += '%s on %s:' % (c, svr_host) _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n' f.write(_msg + '\n') self.logger.error(_msg) else: of = os.path.join(diag_out_dest, os.path.basename(c) + '.out') _f = open(of, 'w+') _f.write('\n'.join(ret['out']) + '\n') _f.close() self.du.rm(hostname=svr_host, path=c, force=True, sudo=True, level=logging.DEBUG2) cmd = ['tar', '-cf', diag_out_dest + '.tar'] cmd += [os.path.basename(diag_out_dest)] ret = self.du.run_cmd(current_host, cmd, sudo=True, cwd=datadir, level=logging.DEBUG2) if ret['rc'] != 0: _msg = 'Failed generate tarball of diag directory' _msg += ' %s' % diag_out_dest _msg += ' after adding core(s) information in it:' _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n' f.write(_msg + '\n') self.logger.error(_msg) f.close() return cmd = ['gzip', diag_out_dest + '.tar'] ret = self.du.run_cmd(current_host, cmd, sudo=True, level=logging.DEBUG2) if ret['rc'] != 0: _msg = 'Failed compress tarball of diag %s' % diag_out_dest _msg += '.tar after adding core(s) information in it:' _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n' f.write(_msg + '\n') self.logger.error(_msg) f.close() return self.du.rm(current_host, diag_out_dest, sudo=True, recursive=True, force=True, level=logging.DEBUG2) else: diag_out_dest = diag_out_dest.rstrip('.tar.gz') dest = os.path.join(datadir, 'PBS_' + current_host.split('.')[0] + '.tar.gz') ret = self.du.run_copy(current_host, diag_out_dest + '.tar.gz', dest, sudo=True, level=logging.DEBUG2) if ret['rc'] != 0: _msg = 'Failed rename tarball of diag from %s' % diag_out_dest _msg += '.tar.gz to %s:' % dest _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n' f.write(_msg + '\n') self.logger.error(_msg) f.close() return self.du.rm(current_host, path=diag_out_dest + '.tar.gz', force=True, sudo=True, level=logging.DEBUG2) f.close() self.__save_data_count += 1 _msg = 'Successfully saved post analysis data' self.logger.log(logging.DEBUG2, _msg) def addError(self, test, err): self.__save_home(test, 'ERROR', err) def addFailure(self, test, err): self.__save_home(test, 'FAIL', err) def addSuccess(self, test): self.__save_home(test, 'PASS')
class PTLTestData(Plugin): """ Save post analysis data on test cases failure or error """ name = 'PTLTestData' score = sys.maxint - 6 logger = logging.getLogger(__name__) def __init__(self): Plugin.__init__(self) self.post_data_dir = None self.max_postdata_threshold = None self.__save_data_count = 0 self.__priv_sn = '' self.du = DshUtils() def options(self, parser, env): """ Register command line options """ pass def set_data(self, post_data_dir, max_postdata_threshold): self.post_data_dir = post_data_dir self.max_postdata_threshold = max_postdata_threshold def configure(self, options, config): """ Configure the plugin and system, based on selected options """ self.config = config if self.post_data_dir is not None: self.enabled = True else: self.enabled = False def __save_home(self, test, status, err=None): if hasattr(test, 'test'): _test = test.test sn = _test.__class__.__name__ elif hasattr(test, 'context'): _test = test.context sn = _test.__name__ else: # test does not have any PBS Objects, so just return return if self.__priv_sn != sn: self.__save_data_count = 0 self.__priv_sn = sn # Saving home might take time so disable timeout # handler set by runner tn = getattr(_test, '_testMethodName', 'unknown') testlogs = getattr(test, 'captured_logs', '') datadir = os.path.join(self.post_data_dir, sn, tn) if os.path.exists(datadir): _msg = 'Old post analysis data exists at %s' % datadir _msg += ', skipping saving data for this test case' self.logger.warn(_msg) _msg = 'Please remove old directory or' _msg += ' provide different directory' self.logger.warn(_msg) return if getattr(test, 'old_sigalrm_handler', None) is not None: _h = getattr(test, 'old_sigalrm_handler') signal.signal(signal.SIGALRM, _h) signal.alarm(0) self.logger.log(logging.DEBUG2, 'Saving post analysis data...') current_host = socket.gethostname().split('.')[0] self.du.mkdir(current_host, path=datadir, mode=0755, parents=True, logerr=False, level=logging.DEBUG2) if err is not None: if isclass(err[0]) and issubclass(err[0], SkipTest): status = 'SKIP' status_data = 'Reason = %s' % (err[1]) else: if isclass(err[0]) and issubclass(err[0], TimeOut): status = 'TIMEDOUT' status_data = getattr(test, 'err_in_string', '') else: status_data = '' logfile = os.path.join(datadir, 'logfile_' + status) f = open(logfile, 'w+') f.write(testlogs + '\n') f.write(status_data + '\n') f.write('test duration: %s\n' % str(getattr(test, 'duration', '0'))) if status in ('PASS', 'SKIP'): # Test case passed or skipped, no need to save post analysis data f.close() return if ((self.max_postdata_threshold != 0) and (self.__save_data_count >= self.max_postdata_threshold)): _msg = 'Total number of saved post analysis data for this' _msg += ' testsuite is exceeded max postdata threshold' _msg += ' (%d)' % self.max_postdata_threshold f.write(_msg + '\n') self.logger.error(_msg) f.close() return svr = getattr(_test, 'server', None) if svr is not None: svr_host = svr.hostname else: _msg = 'Could not find Server Object in given test object' _msg += ', skipping saving post analysis data' f.write(_msg + '\n') self.logger.warning(_msg) f.close() return pbs_diag = os.path.join(svr.pbs_conf['PBS_EXEC'], 'unsupported', 'pbs_diag') cur_user = self.du.get_current_user() cmd = [pbs_diag, '-f', '-d', '2'] cmd += ['-u', cur_user] cmd += ['-o', pwd.getpwnam(cur_user).pw_dir] if len(svr.jobs) > 0: cmd += ['-j', ','.join(svr.jobs.keys())] ret = self.du.run_cmd(svr_host, cmd, sudo=True, level=logging.DEBUG2) if ret['rc'] != 0: _msg = 'Failed to get diag information for ' _msg += 'on %s:' % svr_host _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n' f.write(_msg + '\n') self.logger.error(_msg) f.close() return else: diag_re = r"(?P<path>\/.*\/pbs_diag_[\d]+_[\d]+\.tar\.gz).*" m = re.search(diag_re, '\n'.join(ret['out'])) if m is not None: diag_out = m.group('path') else: _msg = 'Failed to find generated diag path in below output:' _msg += '\n\n' + '-' * 80 + '\n' _msg += '\n'.join(ret['out']) + '\n' _msg += '-' * 80 + '\n\n' f.write(_msg) self.logger.error(_msg) f.close() return diag_out_dest = os.path.join(datadir, os.path.basename(diag_out)) if not self.du.is_localhost(svr_host): diag_out_r = svr_host + ':' + diag_out else: diag_out_r = diag_out ret = self.du.run_copy(current_host, diag_out_r, diag_out_dest, sudo=True, level=logging.DEBUG2) if ret['rc'] != 0: _msg = 'Failed to copy generated diag from' _msg += ' %s to %s' % (diag_out_r, diag_out_dest) f.write(_msg + '\n') self.logger.error(_msg) f.close() return else: self.du.rm(svr_host, path=diag_out, sudo=True, force=True, level=logging.DEBUG2) cores = [] dir_list = ['server_priv', 'sched_priv', 'mom_priv'] for d in dir_list: path = os.path.join(svr.pbs_conf['PBS_HOME'], d) files = self.du.listdir(hostname=svr_host, path=path, sudo=True, level=logging.DEBUG2) for _f in files: if os.path.basename(_f).startswith('core'): cores.append(_f) cores = list(set(cores)) if len(cores) > 0: cmd = ['gunzip', diag_out_dest] ret = self.du.run_cmd(current_host, cmd, sudo=True, level=logging.DEBUG2) if ret['rc'] != 0: _msg = 'Failed unzip generated diag at %s:' % diag_out_dest _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n' f.write(_msg + '\n') self.logger.error(_msg) f.close() return diag_out_dest = diag_out_dest.rstrip('.gz') cmd = ['tar', '-xf', diag_out_dest, '-C', datadir] ret = self.du.run_cmd(current_host, cmd, sudo=True, level=logging.DEBUG2) if ret['rc'] != 0: _msg = 'Failed extract generated diag %s' % diag_out_dest _msg += ' to %s:' % datadir _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n' f.write(_msg + '\n') self.logger.error(_msg) f.close() return self.du.rm(hostname=current_host, path=diag_out_dest, force=True, sudo=True, level=logging.DEBUG2) diag_out_dest = diag_out_dest.rstrip('.tar') for c in cores: cmd = [pbs_diag, '-g', c] ret = self.du.run_cmd(svr_host, cmd, sudo=True, level=logging.DEBUG2) if ret['rc'] != 0: _msg = 'Failed to get core file information for ' _msg += '%s on %s:' % (c, svr_host) _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n' f.write(_msg + '\n') self.logger.error(_msg) else: of = os.path.join(diag_out_dest, os.path.basename(c) + '.out') _f = open(of, 'w+') _f.write('\n'.join(ret['out']) + '\n') _f.close() self.du.rm(hostname=svr_host, path=c, force=True, sudo=True, level=logging.DEBUG2) cmd = ['tar', '-cf', diag_out_dest + '.tar'] cmd += [os.path.basename(diag_out_dest)] ret = self.du.run_cmd(current_host, cmd, sudo=True, cwd=datadir, level=logging.DEBUG2) if ret['rc'] != 0: _msg = 'Failed generate tarball of diag directory' _msg += ' %s' % diag_out_dest _msg += ' after adding core(s) information in it:' _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n' f.write(_msg + '\n') self.logger.error(_msg) f.close() return cmd = ['gzip', diag_out_dest + '.tar'] ret = self.du.run_cmd(current_host, cmd, sudo=True, level=logging.DEBUG2) if ret['rc'] != 0: _msg = 'Failed compress tarball of diag %s' % diag_out_dest _msg += '.tar after adding core(s) information in it:' _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n' f.write(_msg + '\n') self.logger.error(_msg) f.close() return self.du.rm(current_host, diag_out_dest, sudo=True, recursive=True, force=True, level=logging.DEBUG2) else: diag_out_dest = diag_out_dest.rstrip('.tar.gz') dest = os.path.join(datadir, 'PBS_' + current_host.split('.')[0] + '.tar.gz') ret = self.du.run_copy(current_host, diag_out_dest + '.tar.gz', dest, sudo=True, level=logging.DEBUG2) if ret['rc'] != 0: _msg = 'Failed rename tarball of diag from %s' % diag_out_dest _msg += '.tar.gz to %s:' % dest _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n' f.write(_msg + '\n') self.logger.error(_msg) f.close() return self.du.rm(current_host, path=diag_out_dest + '.tar.gz', force=True, sudo=True, level=logging.DEBUG2) f.close() self.__save_data_count += 1 _msg = 'Successfully saved post analysis data' self.logger.log(logging.DEBUG2, _msg) def addError(self, test, err): self.__save_home(test, 'ERROR', err) def addFailure(self, test, err): self.__save_home(test, 'FAIL', err) def addSuccess(self, test): self.__save_home(test, 'PASS')
class Job(ResourceResv): """ PBS Job. Attributes and Resources :param username: Job username :type username: str or None :param attrs: Job attributes :type attrs: Dictionary :param jobname: Name of the PBS job :type jobname: str or None """ dflt_attributes = { ATTR_N: 'STDIN', ATTR_j: 'n', ATTR_m: 'a', ATTR_p: '0', ATTR_r: 'y', ATTR_k: 'oe', } runtime = 100 du = DshUtils() def __init__(self, username=TEST_USER, attrs={}, jobname=None): self.platform = self.du.get_platform() self.server = {} self.script = None self.script_body = None if username is not None: self.username = str(username) else: self.username = None self.du = None self.interactive_handle = None if self.platform == 'cray' or self.platform == 'craysim': if 'Resource_List.select' in attrs: select = attrs['Resource_List.select'] attrs['Resource_List.select'] = self.add_cray_vntype(select) elif 'Resource_List.vntype' not in attrs: attrs['Resource_List.vntype'] = 'cray_compute' PBSObject.__init__(self, None, attrs, self.dflt_attributes) if jobname is not None: self.custom_attrs[ATTR_N] = jobname self.attributes[ATTR_N] = jobname self.set_variable_list(self.username) self.set_sleep_time(100) def __del__(self): del self.__dict__ def add_cray_vntype(self, select=None): """ Cray specific function to add vntype as ``cray_compute`` to each select chunk :param select: PBS select statement :type select: str or None """ ra = [] r = select.split('+') for i in r: select = PbsTypeSelect(i) novntype = 'vntype' not in select.resources nohost = 'host' not in select.resources novnode = 'vnode' not in select.resources if novntype and nohost and novnode: i = i + ":vntype=cray_compute" ra.append(i) select_str = '' for l in ra: select_str = select_str + "+" + l select_str = select_str[1:] return select_str def set_attributes(self, a={}): """ set attributes and custom attributes on this job. custom attributes are used when converting attributes to CLI. In case of Cray platform if 'Resource_List.vntype' is set already then remove it and add vntype value to each chunk of a select statement. :param a: Attribute dictionary :type a: Dictionary """ if isinstance(a, list): a = OrderedDict(a) self.attributes = OrderedDict( list(self.dflt_attributes.items()) + list(self.attributes.items()) + list(a.items())) if self.platform == 'cray' or self.platform == 'craysim': s = 'Resource_List.select' in a v = 'Resource_List.vntype' in self.custom_attrs if s and v: del self.custom_attrs['Resource_List.vntype'] select = a['Resource_List.select'] a['Resource_List.select'] = self.add_cray_vntype(select) self.custom_attrs = OrderedDict( list(self.custom_attrs.items()) + list(a.items())) def set_variable_list(self, user=None, workdir=None): """ Customize the ``Variable_List`` job attribute to ``<user>`` """ if user is None: userinfo = pwd.getpwuid(os.getuid()) user = userinfo[0] homedir = userinfo[5] else: try: homedir = pwd.getpwnam(user)[5] except: homedir = "" self.username = user s = ['PBS_O_HOME=' + homedir] s += ['PBS_O_LANG=en_US.UTF-8'] s += ['PBS_O_LOGNAME=' + user] s += ['PBS_O_PATH=/usr/bin:/bin:/usr/bin:/usr/local/bin'] s += ['PBS_O_MAIL=/var/spool/mail/' + user] s += ['PBS_O_SHELL=/bin/bash'] s += ['PBS_O_SYSTEM=Linux'] if workdir is not None: wd = workdir else: wd = os.getcwd() s += ['PBS_O_WORKDIR=' + str(wd)] self.attributes[ATTR_v] = ",".join(s) self.set_attributes() def set_sleep_time(self, duration): """ Set the sleep duration for this job. :param duration: The duration, in seconds, to sleep :type duration: int """ self.set_execargs('/bin/sleep', duration) def set_execargs(self, executable, arguments=None): """ Set the executable and arguments to use for this job :param executable: path to an executable. No checks are made. :type executable: str :param arguments: arguments to executable. :type arguments: str or list or int """ msg = ['job: executable set to ' + str(executable)] if arguments is not None: msg += [' with arguments: ' + str(arguments)] self.logger.info("".join(msg)) self.attributes[ATTR_executable] = executable if arguments is not None: args = '' xml_beginargs = '<jsdl-hpcpa:Argument>' xml_endargs = '</jsdl-hpcpa:Argument>' if isinstance(arguments, list): for a in arguments: args += xml_beginargs + str(a) + xml_endargs elif isinstance(arguments, str): args = xml_beginargs + arguments + xml_endargs elif isinstance(arguments, int): args = xml_beginargs + str(arguments) + xml_endargs self.attributes[ATTR_Arglist] = args else: self.unset_attributes([ATTR_Arglist]) self.set_attributes() def create_script(self, body=None, asuser=None, hostname=None): """ Create a job script from a given body of text into a temporary location :param body: the body of the script :type body: str or None :param asuser: Optionally the user to own this script, defaults ot current user :type asuser: str or None :param hostname: The host on which the job script is to be created :type hostname: str or None """ if body is None: return None if isinstance(body, list): body = '\n'.join(body) if self.platform == 'cray' or self.platform == 'craysim': body = body.split("\n") for i, line in enumerate(body): if line.startswith("#PBS") and "select=" in line: if 'Resource_List.vntype' in self.attributes: self.unset_attributes(['Resource_List.vntype']) line_arr = line.split(" ") for j, element in enumerate(line_arr): select = element.startswith("select=") lselect = element.startswith("-lselect=") if select or lselect: if lselect: sel_str = element[9:] else: sel_str = element[7:] sel_str = self.add_cray_vntype(select=sel_str) if lselect: line_arr[j] = "-lselect=" + sel_str else: line_arr[j] = "select=" + sel_str body[i] = " ".join(line_arr) body = '\n'.join(body) # If the user has a userhost, the job will run from there # so the script should be made there if self.username: user = PbsUser.get_user(self.username) if user.host: hostname = user.host asuser = user.name self.script_body = body if self.du is None: self.du = DshUtils() # First create the temporary file as current user and only change # its mode once the current user has written to it fn = self.du.create_temp_file(hostname, prefix='PtlPbsJobScript', asuser=asuser, body=body) self.du.chmod(hostname, fn, mode=0o755) self.script = fn return fn def create_subjob_id(self, job_array_id, subjob_index): """ insert subjob index into the square brackets of job array id :param job_array_id: PBS parent array job id :type job_array_id: str :param subjob_index: index of subjob :type subjob_index: int :returns: subjob id string """ idx = job_array_id.find('[]') return job_array_id[:idx + 1] + str(subjob_index) + \ job_array_id[idx + 1:] def create_eatcpu_job(self, duration=None, hostname=None): """ Create a job that eats cpu indefinitely or for the given duration of time :param duration: The duration, in seconds, to sleep :type duration: int :param hostname: hostname on which to execute the job :type hostname: str or None """ if self.du is None: self.du = DshUtils() shebang_line = '#!' + self.du.which(hostname, exe='python3') body = """ import signal import sys x = 0 def receive_alarm(signum, stack): sys.exit() signal.signal(signal.SIGALRM, receive_alarm) if (len(sys.argv) > 1): input_time = sys.argv[1] print('Terminating after %s seconds' % input_time) signal.alarm(int(input_time)) else: print('Running indefinitely') while True: x += 1 """ script_body = shebang_line + body script_path = self.du.create_temp_file(hostname=hostname, body=script_body, suffix='.py') if not self.du.is_localhost(hostname): d = pwd.getpwnam(self.username).pw_dir ret = self.du.run_copy(hosts=hostname, src=script_path, dest=d) if ret is None or ret['rc'] != 0: raise AssertionError("Failed to copy file %s to %s" % (script_path, hostname)) pbs_conf = self.du.parse_pbs_config(hostname) shell_path = os.path.join(pbs_conf['PBS_EXEC'], 'bin', 'pbs_python') a = {ATTR_S: shell_path} self.set_attributes(a) mode = 0o755 if not self.du.chmod( hostname=hostname, path=script_path, mode=mode, sudo=True): raise AssertionError("Failed to set permissions for file %s" " to %s" % (script_path, oct(mode))) self.set_execargs(script_path, duration)