def _findCollision(self, tName, nameDict, varDict, hashKeys, keyFmt, nameFmt=identity): dupesDict = {} for (key, name) in nameDict.items(): dupesDict.setdefault(nameFmt(name), []).append(keyFmt(name, key)) ask = True for name, key_list in sorted(dupesDict.items()): if len(key_list) > 1: self._log.warn('Multiple %s keys are mapped to the name %s!', tName, repr(name)) for key in sorted(key_list): self._log.warn('\t%s hash %s using:', tName, str.join('#', key)) for var, value in self._getFilteredVarDict( varDict, key, hashKeys).items(): self._log.warn('\t\t%s = %s', var, value) if ask and not utils.getUserBool('Do you want to continue?', False): sys.exit(os.EX_OK) ask = False
def write(self, stream, entries = None, printMinimal = False, printState = False, printUnused = True, printSource = False, printDefault = True, printTight = False): if not entries: entries = self.iterContent() config = {} for entry in entries: if printUnused or entry.accessed: if printDefault or not entry.source.startswith('<default'): if printState or not entry.option.startswith('#'): config.setdefault(entry.section, {}).setdefault(entry.option, []).append(entry) for section in sorted(config): if not printTight: stream.write('[%s]\n' % section) for option in sorted(config[section]): entryList = sorted(config[section][option], key = lambda e: e.order) if printMinimal: entryList = ConfigEntry.simplifyEntries(entryList) for entry in entryList: if printTight: stream.write('[%s] ' % section) for idx, line in enumerate(entry.format().splitlines()): if printSource and (idx == 0) and entry.source: if len(line) < 33: stream.write('%-35s; %s\n' % (line, entry.source)) else: stream.write('; source: %s\n%s\n' % (entry.source, line)) else: stream.write(line + '\n') if not printTight: stream.write('\n')
def __init__(self, jobDB, task, jobs = None, configString = ''): Report.__init__(self, jobDB, task, jobs, configString) catJobs = {} catDescDict = {} # Assignment of jobs to categories (depending on variables and using datasetnick if available) jobConfig = {} varList = [] for jobNum in self._jobs: if task: jobConfig = task.getJobConfig(jobNum) varList = sorted(ifilter(lambda var: '!' not in repr(var), jobConfig.keys())) if 'DATASETSPLIT' in varList: varList.remove('DATASETSPLIT') varList.append('DATASETNICK') catKey = str.join('|', imap(lambda var: '%s=%s' % (var, jobConfig[var]), varList)) catJobs.setdefault(catKey, []).append(jobNum) if catKey not in catDescDict: catDescDict[catKey] = dict(imap(lambda var: (var, jobConfig[var]), varList)) # Kill redundant keys from description commonVars = dict(imap(lambda var: (var, jobConfig[var]), varList)) # seed with last varList for catKey in catDescDict: for key in list(commonVars.keys()): if key not in catDescDict[catKey].keys(): commonVars.pop(key) elif commonVars[key] != catDescDict[catKey][key]: commonVars.pop(key) for catKey in catDescDict: for commonKey in commonVars: catDescDict[catKey].pop(commonKey) # Generate job-category map with efficient int keys - catNum becomes the new catKey self._job2cat = {} self._catDescDict = {} for catNum, catKey in enumerate(sorted(catJobs)): self._catDescDict[catNum] = catDescDict[catKey] self._job2cat.update(dict.fromkeys(catJobs[catKey], catNum))
def _buildBlocks(self, protoBlocks, hashNameDictDS, hashNameDictB): # Return named dataset for hashDS in sorted(protoBlocks): for hashB in sorted(protoBlocks[hashDS]): blockSEList = None for seList in ifilter( lambda s: s is not None, imap(lambda x: x[3], protoBlocks[hashDS][hashB])): blockSEList = blockSEList or [] blockSEList.extend(seList) if blockSEList is not None: blockSEList = list(UniqueList(blockSEList)) metaKeys = protoBlocks[hashDS][hashB][0][1].keys() def fnProps(path, metadata, events, seList, objStore): if events is None: events = -1 return { DataProvider.URL: path, DataProvider.NEntries: events, DataProvider.Metadata: lmap(metadata.get, metaKeys) } yield { DataProvider.Dataset: hashNameDictDS[hashDS], DataProvider.BlockName: hashNameDictB[hashB][1], DataProvider.Locations: blockSEList, DataProvider.Metadata: list(metaKeys), DataProvider.FileList: lsmap(fnProps, protoBlocks[hashDS][hashB]) }
def __init__(self, config, job_db, task): map_cat2jobs = {} map_cat2desc = {} job_config_dict = {} vn_list = [] for jobnum in job_db.get_job_list(): if task: job_config_dict = task.get_job_dict(jobnum) vn_list = lfilter(self._is_not_ignored_vn, sorted(job_config_dict.keys())) cat_key = str.join('|', imap(lambda vn: '%s=%s' % (vn, job_config_dict[vn]), vn_list)) map_cat2jobs.setdefault(cat_key, []).append(jobnum) if cat_key not in map_cat2desc: map_cat2desc[cat_key] = dict(imap(lambda var: (var, job_config_dict[var]), vn_list)) # Kill redundant keys from description - seed with last vn_list common_var_dict = dict(imap(lambda var: (var, job_config_dict[var]), vn_list)) for cat_key in map_cat2desc: for key in list(common_var_dict.keys()): if key not in map_cat2desc[cat_key].keys(): common_var_dict.pop(key) elif common_var_dict[key] != map_cat2desc[cat_key][key]: common_var_dict.pop(key) for cat_key in map_cat2desc: for common_key in common_var_dict: map_cat2desc[cat_key].pop(common_key) # Generate job-category map with efficient int keys - catNum becomes the new cat_key self._job2cat = {} self._map_cat2desc = {} for cat_num, cat_key in enumerate(sorted(map_cat2jobs)): self._map_cat2desc[cat_num] = map_cat2desc[cat_key] self._job2cat.update(dict.fromkeys(map_cat2jobs[cat_key], cat_num))
def get_plugin_list(pname, inherit_prefix=False): alias_dict = {} inherit_map = {} cls = Plugin.get_class(pname) for entry in cls.get_class_info_list(): depth = entry.pop('depth', 0) (alias, name) = entry.popitem() alias_dict.setdefault(name, []).append((depth, alias)) def _process_child_map(mapping, prefix=''): for cls_name in mapping: inherit_map[cls_name] = _process_child_map(mapping[cls_name], prefix + '-' + cls_name) return prefix _process_child_map(cls.get_class_children(), pname) alias_dict.pop(pname, None) table_list = [] for name in alias_dict: # sorted by length of name and depth by_len_depth = sorted(alias_dict[name], key=lambda d_a: (-len(d_a[1]), d_a[0])) # sorted by depth and name by_depth_name = sorted(alias_dict[name], key=lambda d_a: (d_a[0], d_a[1])) new_name = by_len_depth.pop()[1] depth = min(imap(lambda d_a: d_a[0], alias_dict[name])) alias_list = lmap(lambda d_a: d_a[1], by_depth_name) alias_list.remove(new_name) if inherit_prefix: new_name = ' | ' * (inherit_map[name].count('-') - 1) + new_name entry = {'Name': new_name, 'Alias': str.join(', ', alias_list), 'Depth': '%02d' % depth, 'Inherit': inherit_map.get(name, '')} table_list.append(entry) return table_list
def _match_entries(self, container, option_list=None): key_list = container.get_options() if option_list is not None: key_list = lfilter(key_list.__contains__, option_list) def _get_entry_key_ordered(entry): return (tuple(imap(_remove_none, _get_section_key_filtered(entry))), entry.order) def _get_section_key_filtered(entry): return self._get_section_key( entry.section.replace('!', '').strip()) def _remove_none(key): if key is None: return -1 return key def _select_sections(entry): return _get_section_key_filtered(entry) is not None result = [] for key in key_list: (entries, entries_reverse) = ([], []) for entry in container.iter_config_entries(key, _select_sections): if entry.section.endswith('!'): entries_reverse.append(entry) else: entries.append(entry) result.extend( sorted(entries_reverse, key=_get_entry_key_ordered, reverse=True)) result.extend(sorted(entries, key=_get_entry_key_ordered)) return result
def jobs(self, *args, **kw): result = '<body>' result += str(CPProgressBar(0, min(100, self.counter), 100, 300)) if 'job' in kw: jobNum = int(kw['job']) info = self.task.getJobConfig(jobNum) result += str(TabularHTML(zip(sorted(info), sorted(info)), [info], top = False)) def getJobObjs(): for jobNum in self.jobMgr.jobDB.getJobs(): result = self.jobMgr.jobDB.get(jobNum).__dict__ result['jobNum'] = jobNum result.update(result['dict']) yield result fmtTime = lambda t: time.strftime('%Y-%m-%d %T', time.localtime(t)) result += str(TabularHTML([ ('jobNum', 'Job'), ('state', 'Status'), ('attempt', 'Attempt'), ('wmsId', 'WMS ID'), ('dest', 'Destination'), ('submitted', 'Submitted') ], getJobObjs(), fmt = { 'jobNum': lambda x: '<a href="jobs?job=%s">%s</a>' % (x, x), 'state': lambda s: Job.enum2str(s), 'submitted': fmtTime }, top = True)) result += '</body>' return result
def __init__(self, jobDB, task, jobs = None, configString = ''): Report.__init__(self, jobDB, task, jobs, configString) catJobs = {} catDescDict = {} # Assignment of jobs to categories (depending on variables and using datasetnick if available) for jobNum in self._jobs: jobConfig = task.getJobConfig(jobNum) varList = sorted(filter(lambda var: '!' not in repr(var), jobConfig.keys())) if 'DATASETSPLIT' in varList: varList.remove('DATASETSPLIT') varList.append('DATASETNICK') catKey = str.join('|', map(lambda var: '%s=%s' % (var, jobConfig[var]), varList)) catJobs.setdefault(catKey, []).append(jobNum) if catKey not in catDescDict: catDescDict[catKey] = dict(map(lambda var: (var, jobConfig[var]), varList)) # Kill redundant keys from description commonVars = dict(map(lambda var: (var, jobConfig[var]), varList)) # seed with last varList for catKey in catDescDict: for key in commonVars.keys(): if key not in catDescDict[catKey].keys(): commonVars.pop(key) elif commonVars[key] != catDescDict[catKey][key]: commonVars.pop(key) for catKey in catDescDict: for commonKey in commonVars: catDescDict[catKey].pop(commonKey) # Generate job-category map with efficient int keys - catNum becomes the new catKey self._job2cat = {} self._catDescDict = {} for catNum, catKey in enumerate(sorted(catJobs)): self._catDescDict[catNum] = catDescDict[catKey] self._job2cat.update(dict.fromkeys(catJobs[catKey], catNum))
def _get_sandbox_file_list(self, task, sm_list): # Prepare all input files dep_list = set(ichain(imap(lambda x: x.get_dependency_list(), [task] + sm_list))) dep_fn_list = lmap(lambda dep: resolve_path('env.%s.sh' % dep, lmap(lambda pkg: get_path_share('', pkg=pkg), os.listdir(get_path_pkg()))), dep_list) task_config_dict = dict_union(self._remote_event_handler.get_mon_env_dict(), *imap(lambda x: x.get_task_dict(), [task] + sm_list)) task_config_dict.update({'GC_DEPFILES': str.join(' ', dep_list), 'GC_USERNAME': self._token.get_user_name(), 'GC_WMS_NAME': self._name}) task_config_str_list = DictFormat(escape_strings=True).format( task_config_dict, format='export %s%s%s\n') vn_alias_dict = dict(izip(self._remote_event_handler.get_mon_env_dict().keys(), self._remote_event_handler.get_mon_env_dict().keys())) vn_alias_dict.update(task.get_var_alias_map()) vn_alias_str_list = DictFormat(delimeter=' ').format(vn_alias_dict, format='%s%s%s\n') # Resolve wildcards in task input files def _get_task_fn_list(): for fpi in task.get_sb_in_fpi_list(): matched = glob.glob(fpi.path_abs) if matched != []: for match in matched: yield match else: yield fpi.path_abs return lchain([self._remote_event_handler.get_file_list(), dep_fn_list, _get_task_fn_list(), [ VirtualFile('_config.sh', sorted(task_config_str_list)), VirtualFile('_varmap.dat', sorted(vn_alias_str_list))]])
def DiffLists(oldList, newList, keyFun, changedFkt, isSorted = False): (listAdded, listMissing, listChanged) = ([], [], []) if not isSorted: (newList, oldList) = (sorted(newList, key = keyFun), sorted(oldList, key = keyFun)) (newIter, oldIter) = (iter(newList), iter(oldList)) (new, old) = (next(newIter, None), next(oldIter, None)) while True: if (new is None) or (old is None): break keyNew = keyFun(new) keyOld = keyFun(old) if keyNew < keyOld: # new[npos] < old[opos] listAdded.append(new) new = next(newIter, None) elif keyNew > keyOld: # new[npos] > old[opos] listMissing.append(old) old = next(oldIter, None) else: # new[npos] == old[opos] according to *active* comparison changedFkt(listAdded, listMissing, listChanged, old, new) (new, old) = (next(newIter, None), next(oldIter, None)) while new is not None: listAdded.append(new) new = next(newIter, None) while old is not None: listMissing.append(old) old = next(oldIter, None) return (listAdded, listMissing, listChanged)
def _get_workflow_graph(workflow): (graph, node_list) = _get_graph(workflow) # Process nodes node_str_list = [] map_node2name = {} map_node2color = {} for node in sorted(node_list, key=lambda x: x.__class__.__name__): node_props = { 'label': '"%s"' % _get_node_label(node), 'fillcolor': '"%s"' % _get_node_color(node, map_node2color), 'style': '"filled"', } if node == workflow: node_props['root'] = 'True' node_prop_str = str.join('; ', imap(lambda key: '%s = %s' % (key, node_props[key]), node_props)) node_str_list.append('%s [%s];\n' % (_get_node_name(node, map_node2name), node_prop_str)) # Process edges edge_str_list = [] for entry in sorted(graph, key=lambda x: x.__class__.__name__): for child in sorted(set(graph[entry]), key=lambda x: x.__class__.__name__): edge_str_list.append('%s -> %s;\n' % (_get_node_name(entry, map_node2name), _get_node_name(child, map_node2name))) cluster_str_list = [] dot_header = 'digraph mygraph {\nmargin=0;\nedge [len=2];\noverlap=compress;splines=True;\n' dot_format_string_list = [dot_header] + node_str_list + cluster_str_list + edge_str_list + ['}\n'] return str.join('', dot_format_string_list)
def getCMSFiles(self, blockPath): lumiDict = {} if self.selectedLumis: # Central lumi query lumiDict = self.getCMSLumisImpl(blockPath) lumiDict = QM(lumiDict, lumiDict, {}) for (fileInfo, listLumi) in self.getCMSFilesImpl(blockPath, self.onlyValid, self.selectedLumis): if self.selectedLumis: if not listLumi: listLumi = lumiDict.get(fileInfo[DataProvider.URL], []) def acceptLumi(): for (run, lumiList) in listLumi: for lumi in lumiList: if selectLumi((run, lumi), self.selectedLumis): return True if not acceptLumi(): continue if self.includeLumi: (listLumiExt_Run, listLumiExt_Lumi) = ([], []) for (run, lumi_list) in sorted(listLumi): for lumi in lumi_list: listLumiExt_Run.append(run) listLumiExt_Lumi.append(lumi) fileInfo[DataProvider.Metadata] = [listLumiExt_Run, listLumiExt_Lumi] else: fileInfo[DataProvider.Metadata] = [list(sorted(set(map(lambda (run, lumi_list): run, listLumi))))] yield fileInfo
def DiffLists(oldList, newList, keyFun, changedFkt, isSorted=False): (listAdded, listMissing, listChanged) = ([], [], []) if not isSorted: (newList, oldList) = (sorted(newList, key=keyFun), sorted(oldList, key=keyFun)) (newIter, oldIter) = (iter(newList), iter(oldList)) (new, old) = (next(newIter, None), next(oldIter, None)) while True: if (new is None) or (old is None): break keyNew = keyFun(new) keyOld = keyFun(old) if keyNew < keyOld: # new[npos] < old[opos] listAdded.append(new) new = next(newIter, None) elif keyNew > keyOld: # new[npos] > old[opos] listMissing.append(old) old = next(oldIter, None) else: # new[npos] == old[opos] according to *active* comparison changedFkt(listAdded, listMissing, listChanged, old, new) (new, old) = (next(newIter, None), next(oldIter, None)) while new is not None: listAdded.append(new) new = next(newIter, None) while old is not None: listMissing.append(old) old = next(oldIter, None) return (listAdded, listMissing, listChanged)
def _match_entries(self, container, option_list=None): key_list = container.get_options() if option_list is not None: key_list = lfilter(key_list.__contains__, option_list) def _get_entry_key_ordered(entry): return (tuple(imap(_remove_none, _get_section_key_filtered(entry))), entry.order) def _get_section_key_filtered(entry): return self._get_section_key(entry.section.replace('!', '').strip()) def _remove_none(key): if key is None: return -1 return key def _select_sections(entry): return _get_section_key_filtered(entry) is not None result = [] for key in key_list: (entries, entries_reverse) = ([], []) for entry in container.iter_config_entries(key, _select_sections): if entry.section.endswith('!'): entries_reverse.append(entry) else: entries.append(entry) result.extend(sorted(entries_reverse, key=_get_entry_key_ordered, reverse=True)) result.extend(sorted(entries, key=_get_entry_key_ordered)) return result
def DiffLists(oldList, newList, cmpFkt, changedFkt, isSorted = False): (listAdded, listMissing, listChanged) = ([], [], []) if not isSorted: (newList, oldList) = (sorted(newList, cmpFkt), sorted(oldList, cmpFkt)) (newIter, oldIter) = (iter(newList), iter(oldList)) (new, old) = (next(newIter, None), next(oldIter, None)) while True: if (new == None) or (old == None): break result = cmpFkt(new, old) if result < 0: # new[npos] < old[opos] listAdded.append(new) new = next(newIter, None) elif result > 0: # new[npos] > old[opos] listMissing.append(old) old = next(oldIter, None) else: # new[npos] == old[opos] according to *active* comparison changedFkt(listAdded, listMissing, listChanged, old, new) (new, old) = (next(newIter, None), next(oldIter, None)) while new != None: listAdded.append(new) new = next(newIter, None) while old != None: listMissing.append(old) old = next(oldIter, None) return (listAdded, listMissing, listChanged)
def get_list_difference(list_old, list_new, key_fun, on_matching_fun, is_sorted=False, key_fun_sort=None): (list_added, list_missing, list_matching) = ([], [], []) if not is_sorted: list_new = sorted(list_new, key=key_fun_sort or key_fun) list_old = sorted(list_old, key=key_fun_sort or key_fun) (iter_new, iter_old) = (iter(list_new), iter(list_old)) (new, old) = (next(iter_new, None), next(iter_old, None)) while True: if (new is None) or (old is None): break key_new = key_fun(new) key_old = key_fun(old) if key_new < key_old: # new[npos] < old[opos] list_added.append(new) new = next(iter_new, None) elif key_new > key_old: # new[npos] > old[opos] list_missing.append(old) old = next(iter_old, None) else: # new[npos] == old[opos] according to *active* comparison on_matching_fun(list_added, list_missing, list_matching, old, new) (new, old) = (next(iter_new, None), next(iter_old, None)) while new is not None: list_added.append(new) new = next(iter_new, None) while old is not None: list_missing.append(old) old = next(iter_old, None) return (list_added, list_missing, list_matching)
def _matchEntries(self, container, option_list=None): key_list = container.getKeys() if option_list is not None: key_list = lfilter(lambda key: key in key_list, option_list) result = [] getFilteredSectionKey = lambda entry: self._getSectionKey( entry.section.replace('!', '').strip()) def removeNone(key): if key is None: return -1 return key getOrderedEntryKey = lambda entry: (tuple( imap(removeNone, getFilteredSectionKey(entry))), entry.order) for key in key_list: (entries, entries_reverse) = ([], []) for entry in container.getEntries( key, lambda x: getFilteredSectionKey(x) is not None): if entry.section.endswith('!'): entries_reverse.append(entry) else: entries.append(entry) result.extend( sorted(entries_reverse, key=getOrderedEntryKey, reverse=True)) result.extend(sorted(entries, key=getOrderedEntryKey)) return result
def get_workflow_graph(workflow): graph = getGraph(workflow) classCluster = {} for entry in graph: classCluster.setdefault(getNodeParent(entry.__class__), []).append(entry) clusters = '' globalNodes = [] colors = {} for (cluster_id, classClusterEntries) in enumerate(classCluster.values()): if len(classClusterEntries) == 1: globalNodes.append(classClusterEntries[0]) clusters += 'subgraph cluster_%d {' % cluster_id for node in classClusterEntries: clusters += '%s [label="%s", fillcolor="%s", style="filled"];\n' % ( getNodeName(node), getNodeLabel(node), getNodeColor(node, colors)) clusters += '}\n' edgeStr = '' for entry in sorted(graph, key=lambda x: x.__class__.__name__): for child in sorted(set(graph[entry]), key=lambda x: x.__class__.__name__): edgeStr += '%s -> %s;\n' % (getNodeName(entry), getNodeName(child)) header = 'digraph mygraph {\nmargin=0;\noverlap=scale;splines=True;\n' footer = '}\n' return header + clusters + edgeStr + footer
def jobs(self, *args, **kw): result = '<body>' result += str(CPProgressBar(0, min(100, self.counter), 100, 300)) if 'job' in kw: jobNum = int(kw['job']) info = self.task.getJobConfig(jobNum) result += str( TabularHTML(lzip(sorted(info), sorted(info)), [info], top=False)) def getJobObjs(): for jobNum in self.jobMgr.jobDB.getJobs(): result = self.jobMgr.jobDB.get(jobNum).__dict__ result['jobNum'] = jobNum result.update(result['dict']) yield result fmtTime = lambda t: time.strftime('%Y-%m-%d %T', time.localtime(t)) result += str( TabularHTML( [('jobNum', 'Job'), ('state', 'Status'), ('attempt', 'Attempt'), ('wmsId', 'WMS ID'), ('dest', 'Destination'), ('submitted', 'Submitted')], getJobObjs(), fmt={ 'jobNum': lambda x: '<a href="jobs?job=%s">%s</a>' % (x, x), 'state': Job.enum2str, 'submitted': fmtTime }, top=True)) result += '</body>' return result
def resolve_paths(path, search_path_list=None, must_exist=True, exception_type=PathError): path = clean_path(path) # replace $VAR, ~user, \ separators result = [] if os.path.isabs(path): result.extend(sorted( glob.glob(path))) # Resolve wildcards for existing files if not result: if must_exist: raise exception_type('Could not find file "%s"' % path) return [path] # Return non-existing, absolute path else: # search relative path in search directories search_path_list = search_path_list or [] for spath in UniqueList(search_path_list): result.extend( sorted(glob.glob(clean_path(os.path.join(spath, path))))) if not result: if must_exist: raise exception_type( 'Could not find file "%s" in \n\t%s' % (path, str.join('\n\t', search_path_list))) return [path] # Return non-existing, relative path return result
def jobs(self, *args, **kw): element_list = [CPProgressBar(0, min(100, self._counter), 100, 300)] if 'job' in kw: jobnum = int(kw['job']) info = self._workflow.task.get_job_dict(jobnum) element_list.append( CPTable(lzip(sorted(info), sorted(info)), [info], pivot=False)) def _fmt_time(value): return time.strftime('%Y-%m-%d %T', time.localtime(value)) def _iter_job_objs(): for jobnum in self._workflow.job_manager.job_db.get_job_list(): result = self._workflow.job_manager.job_db.get_job_transient( jobnum).__dict__ result['jobnum'] = jobnum result.update(result['dict']) yield result header_list = [('jobnum', 'Job'), ('state', 'Status'), ('attempt', 'Attempt'), ('gc_id', 'WMS ID'), ('SITE', 'Site'), ('QUEUE', 'Queue'), ('submitted', 'Submitted')] fmt_dict = { 'jobnum': lambda x: '<a href="jobs?job=%s">%s</a>' % (x, x), 'state': Job.enum2str, 'submitted': _fmt_time } element_list.append( CPTable(header_list, _iter_job_objs(), fmt_dict=fmt_dict, pivot=True)) return _get_html_page(element_list)
def list_parameters(psrc, opts): (psp_list, need_gc_param) = get_parameters(opts, psrc) enabled_vn_list = opts.output.split(',') meta_list = lfilter(lambda k: (k in enabled_vn_list) or not opts.output, psrc.get_job_metadata()) tracked_vn_list = lmap(lambda k: k.value, ifilter(lambda k: not k.untracked, meta_list)) untracked_vn_list = lmap(lambda k: k.value, ifilter(lambda k: k.untracked, meta_list)) if opts.collapse > 0: (header_list, psp_list) = collapse_psp_list(psp_list, tracked_vn_list, opts) else: header_list = [('GC_JOB_ID', '#')] if need_gc_param: header_list.append(('GC_PARAM', 'GC_PARAM')) if opts.active: header_list.append((ParameterInfo.ACTIVE, 'ACTIVE')) if opts.visible: tracked_vn_list = opts.visible.split(',') header_list.extend(sorted(izip(tracked_vn_list, tracked_vn_list))) if opts.untracked: header_list.extend( sorted( imap( lambda n: (n, '(%s)' % n), ifilter(lambda n: n not in ['GC_PARAM', 'GC_JOB_ID'], untracked_vn_list)))) ConsoleTable.create(header_list, psp_list)
def display_metadata(dataset_list, block, metadata_key_list, metadata_list, base_header_list=None): header_list = [(DataProvider.BlockName, 'Block')] + (base_header_list or []) + \ lzip(sorted(metadata_key_list), sorted(metadata_key_list)) for metadata in metadata_list: metadata[DataProvider.Dataset] = block[DataProvider.Dataset] metadata[DataProvider.BlockName] = block.get(DataProvider.BlockName) title = get_title_update_header(dataset_list, header_list) ConsoleTable.create(header_list, metadata_list, title=title, pivot=True)
def list_parameters(opts, psource): (result, needGCParam) = get_parameters(opts, psource) enabledOutput = opts.output.split(',') output = lfilter(lambda k: not opts.output or k in enabledOutput, psource.getJobKeys()) stored = lfilter(lambda k: k.untracked == False, output) untracked = lfilter(lambda k: k.untracked == True, output) if opts.collapse > 0: result_old = result result = {} result_nicks = {} head = [('COLLATE_JOBS', '# of jobs')] if 'DATASETSPLIT' in stored: stored.remove('DATASETSPLIT') if opts.collapse == 1: stored.append('DATASETNICK') head.append(('DATASETNICK', 'DATASETNICK')) elif opts.collapse == 2: head.append(('COLLATE_NICK', '# of nicks')) for pset in result_old: if ('DATASETSPLIT' in pset) and (opts.collapse == 1): pset.pop('DATASETSPLIT') nickname = None if ('DATASETNICK' in pset) and (opts.collapse == 2): nickname = pset.pop('DATASETNICK') h = md5_hex(repr(lmap(lambda key: pset.get(str(key)), stored))) result.setdefault(h, []).append(pset) result_nicks.setdefault(h, set()).add(nickname) def doCollate(h): tmp = result[h][0] tmp['COLLATE_JOBS'] = len(result[h]) tmp['COLLATE_NICK'] = len(result_nicks[h]) return tmp result = lmap(doCollate, result) else: head = [('GC_JOB_ID', '#')] if needGCParam: head.append(('GC_PARAM', 'GC_PARAM')) if opts.active: head.append((ParameterInfo.ACTIVE, 'ACTIVE')) if opts.visible: stored = opts.visible.split(',') head.extend(sorted(izip(stored, stored))) if opts.untracked: head.extend( sorted( imap( lambda n: (n, '(%s)' % n), ifilter(lambda n: n not in ['GC_PARAM', 'GC_JOB_ID'], untracked)))) utils.vprint('') utils.printTabular(head, result)
def getDisplayList(aliasDict): tableList = [] for name in aliasDict: # sorted by length of name and depth by_len_depth = sorted(aliasDict[name], key = lambda d_a: (-len(d_a[1]), d_a[0])) # sorted by depth and name by_depth_name = sorted(aliasDict[name], key = lambda d_a: (d_a[0], d_a[1])) new_name = by_len_depth.pop()[1] aliasList = lmap(lambda d_a: d_a[1], by_depth_name) aliasList.remove(new_name) entry = {'Name': new_name, 'Alias': str.join(', ', aliasList)} if ('Multi' not in name) and ('Base' not in name): tableList.append(entry) return tableList
def _findCollision(self, tName, nameDict, varDict, hashKeys, keyFmt, nameFmt = identity): dupesDict = {} for (key, name) in nameDict.items(): dupesDict.setdefault(nameFmt(name), []).append(keyFmt(name, key)) ask = True for name, key_list in sorted(dupesDict.items()): if len(key_list) > 1: self._log.warn('Multiple %s keys are mapped to the name %s!', tName, repr(name)) for key in sorted(key_list): self._log.warn('\t%s hash %s using:', tName, str.join('#', key)) for var, value in self._getFilteredVarDict(varDict, key, hashKeys).items(): self._log.warn('\t\t%s = %s', var, value) if ask and not utils.getUserBool('Do you want to continue?', False): sys.exit(os.EX_OK) ask = False
def list_parameters(opts, psource): (result, needGCParam) = get_parameters(opts, psource) enabledOutput = opts.output.split(',') output = lfilter(lambda k: not opts.output or k in enabledOutput, psource.getJobKeys()) stored = lfilter(lambda k: k.untracked == False, output) untracked = lfilter(lambda k: k.untracked == True, output) if opts.collapse > 0: result_old = result result = {} result_nicks = {} head = [('COLLATE_JOBS', '# of jobs')] if 'DATASETSPLIT' in stored: stored.remove('DATASETSPLIT') if opts.collapse == 1: stored.append('DATASETNICK') head.append(('DATASETNICK', 'DATASETNICK')) elif opts.collapse == 2: head.append(('COLLATE_NICK', '# of nicks')) for pset in result_old: if ('DATASETSPLIT' in pset) and (opts.collapse == 1): pset.pop('DATASETSPLIT') nickname = None if ('DATASETNICK' in pset) and (opts.collapse == 2): nickname = pset.pop('DATASETNICK') h = md5_hex(repr(lmap(pset.get, stored))) result.setdefault(h, []).append(pset) result_nicks.setdefault(h, set()).add(nickname) def doCollate(h): tmp = result[h][0] tmp['COLLATE_JOBS'] = len(result[h]) tmp['COLLATE_NICK'] = len(result_nicks[h]) return tmp result = lmap(doCollate, result) else: head = [('GC_JOB_ID', '#')] if needGCParam: head.append(('GC_PARAM', 'GC_PARAM')) if opts.active: head.append((ParameterInfo.ACTIVE, 'ACTIVE')) if opts.visible: stored = opts.visible.split(',') head.extend(sorted(izip(stored, stored))) if opts.untracked: head.extend(sorted(imap(lambda n: (n, '(%s)' % n), ifilter(lambda n: n not in ['GC_PARAM', 'GC_JOB_ID'], untracked)))) utils.vprint('') utils.printTabular(head, result)
def _str_dict_cfg(value, parser=identity, strfun=str): (srcdict, srckeys) = value result = '' if srcdict.get(None) is not None: result = strfun(srcdict[None]) key_value_iter = imap(lambda k: '%s => %s' % (k, strfun(srcdict[k])), sorted(srckeys)) return (result + str.join(' <newline> ', key_value_iter)).strip()
def dump_log_setup(level): root = logging.getLogger() def display_logger(indent, logger, name): propagate_symbol = '+' if hasattr(logger, 'propagate') and not logger.propagate: propagate_symbol = 'o' desc = name if hasattr(logger, 'level'): desc += ' (level = %s)' % logging.getLevelName(logger.level) root.log(level, '%s%s %s', '| ' * indent, propagate_symbol, desc) if hasattr(logger, 'filters'): for lf in logger.filters: root.log(level, '%s# %s', '| ' * (indent + 1), lf.__class__.__name__) if hasattr(logger, 'handlers'): for handler in logger.handlers: root.log(level, '%s> %s', '| ' * (indent + 1), handler.__class__.__name__) fmt = handler.formatter if fmt: desc = fmt.__class__.__name__ if isinstance(fmt, ExceptionFormatter): desc = repr(fmt) elif isinstance(fmt, logging.Formatter): desc += '(%s, %s)' % (repr(getattr(fmt, '_fmt')), repr(fmt.datefmt)) root.log(level, '%s| %% %s', '| ' * (indent + 1), desc) display_logger(0, root, '<root>') for key, logger in sorted(root.manager.loggerDict.items()): display_logger(key.count('.') + 1, logger, key)
def _readJobs(self, jobLimit): utils.ensureDirExists(self._dbPath, 'job database directory', JobError) candidates = [] for jobFile in fnmatch.filter(os.listdir(self._dbPath), 'job_*.txt'): try: # 2xsplit is faster than regex jobNum = int(jobFile.split(".")[0].split("_")[1]) except Exception: continue candidates.append((jobNum, jobFile)) (jobMap, maxJobs) = ({}, len(candidates)) activity = Activity('Reading job infos') idx = 0 for (jobNum, jobFile) in sorted(candidates): idx += 1 if (jobLimit >= 0) and (jobNum >= jobLimit): self._log.info('Stopped reading job infos at job #%d out of %d available job files, since the limit of %d jobs is reached', jobNum, len(candidates), jobLimit) break jobObj = self._load_job(os.path.join(self._dbPath, jobFile)) jobMap[jobNum] = jobObj if idx % 100 == 0: activity.update('Reading job infos %d [%d%%]' % (idx, (100.0 * idx) / maxJobs)) activity.finish() return jobMap
def _cancel(self, task, wms, jobnum_list, interactive, show_jobs): if len(jobnum_list) == 0: return if show_jobs: self._abort_report.show_report(self.job_db, jobnum_list) if interactive and not self._uii.prompt_bool('Do you really want to cancel these jobs?', True): return def _mark_cancelled(jobnum): job_obj = self.job_db.get_job(jobnum) if job_obj is not None: self._update(task, job_obj, jobnum, Job.CANCELLED) self._local_event_handler.on_job_update(task, wms, job_obj, jobnum, {'reason': 'cancelled'}) jobnum_list.reverse() map_gc_id2jobnum = self._get_map_gc_id_jobnum(jobnum_list) gc_id_list = sorted(map_gc_id2jobnum, key=lambda gc_id: -map_gc_id2jobnum[gc_id]) for (gc_id,) in wms.cancel_jobs(gc_id_list): # Remove cancelledd job from todo list and mark as cancelled _mark_cancelled(map_gc_id2jobnum.pop(gc_id)) if map_gc_id2jobnum: jobnum_list = list(map_gc_id2jobnum.values()) self._log.warning('There was a problem with cancelling the following jobs:') self._abort_report.show_report(self.job_db, jobnum_list) if (not interactive) or self._uii.prompt_bool('Do you want to mark them as cancelled?', True): lmap(_mark_cancelled, jobnum_list) if interactive: wait(2)
def ds_generator(): for ds in sorted(dsCache): if self._sortBlock: sort_inplace(dsCache[ds], key=itemgetter(DataProvider.BlockName)) for block in dsCache[ds]: yield block
def get_dataset_info(opts, args, query_blocks=True): config = get_dataset_config(opts, args) provider = config.get_composited_plugin('dataset', cls=DataProvider, bind_kwargs={'provider_name_default': config.get('dataset provider')}, default_compositor=':ThreadedMultiDatasetProvider:') # -T disables multi-threading further below dataset_list = sorted(provider.get_dataset_name_list()) if len(dataset_list) == 0: raise DatasetError('No datasets matched!') # Query blocks only if needed query_blocks = False for option in opts.__dict__: if option.startswith('list_') and (option != 'list_dataset_names') or (option == 'save'): if getattr(opts, option): query_blocks = True block_list = None if query_blocks: block_list = provider.get_block_list_cached(show_stats=False) if len(block_list) == 0: raise DatasetError('No blocks matched!') if opts.ordered: sort_inplace(block_list, key=itemgetter(DataProvider.Dataset, DataProvider.BlockName)) for block in block_list: sort_inplace(block[DataProvider.FileList], key=itemgetter(DataProvider.URL)) return (provider, dataset_list, block_list)
def _fill_cms_fi_list(self, block, block_path): activity_fi = Activity('Getting file information') lumi_used = False lumi_info_dict = {} if self._lumi_query: # central lumi query lumi_info_dict = self._get_cms_lumi_dict(block_path) fi_list = [] for (fi, lumi_info_list) in self._iter_cms_files(block_path, self._only_valid, self._lumi_query): self._raise_on_abort() if lumi_info_dict and not lumi_info_list: lumi_info_list = lumi_info_dict.get(fi[DataProvider.URL], []) if lumi_info_list: (run_list_result, lumi_list_result) = ([], []) for (run, lumi_list) in sorted(lumi_info_list): run_list_result.extend([run] * len(lumi_list)) lumi_list_result.extend(lumi_list) assert len(run_list_result) == len(lumi_list_result) fi[DataProvider.Metadata] = [run_list_result, lumi_list_result] lumi_used = True fi_list.append(fi) if lumi_used: block.setdefault(DataProvider.Metadata, []).extend(['Runs', 'Lumi']) block[DataProvider.FileList] = fi_list activity_fi.finish()
def display(keys, varDict, varPrefix = ''): maxlen = max(map(len, tmp.keys()) + [0]) for var in sorted(keys): value = safeRepr(varDict[var]) if 'password' in var: value = '<redacted>' log.critical('\t\t%s%s = %s', varPrefix, var.ljust(maxlen), value)
def _process_config_file_list(self, config, config_file_list, fragment_path, auto_prepare, must_prepare): # process list of uninitialized config files iter_uninitialized_config_files = self._config_find_uninitialized( config, config_file_list, auto_prepare, must_prepare) for (cfg, cfg_new, do_prepare) in iter_uninitialized_config_files: ask_user_msg = 'Do you want to prepare %s for running over the dataset?' % cfg if do_prepare and (auto_prepare or self._uii.prompt_bool(ask_user_msg, True)): self._config_store_backup(cfg, cfg_new, fragment_path) else: self._config_store_backup(cfg, cfg_new) result = [] for cfg in config_file_list: cfg_new = config.get_work_path(os.path.basename(cfg)) if not os.path.exists(cfg_new): raise ConfigError( 'Config file %r was not copied to the work directory!' % cfg) is_instrumented = self._config_is_instrumented(cfg_new) if must_prepare and not is_instrumented: raise ConfigError( 'Config file %r must use %s to work properly!' % (cfg, str.join( ', ', imap(lambda x: '@%s@' % x, sorted( self._needed_vn_set))))) if auto_prepare and not is_instrumented: self._log.warning('Config file %r was not instrumented!', cfg) result.append(cfg_new) return result
def _getSubmissionJobs(self, maxsample): # Get list of submittable jobs readyList = self.jobDB.getJobs(ClassSelector(JobClass.READY)) retryOK = readyList defaultJob = Job() if self._job_retries >= 0: retryOK = lfilter(lambda x: self.jobDB.get(x, defaultJob).attempt - 1 < self._job_retries, readyList) modOK = lfilter(self._task.canSubmit, readyList) jobList = set.intersection(set(retryOK), set(modOK)) if self._showBlocker and readyList and not jobList: # No submission but ready jobs err = [] err += utils.QM((len(retryOK) > 0) and (len(modOK) == 0), [], ['have hit their maximum number of retries']) err += utils.QM((len(retryOK) == 0) and (len(modOK) > 0), [], ['are vetoed by the task module']) self._log_user_time.warning('All remaining jobs %s!', str.join(utils.QM(retryOK or modOK, ' or ', ' and '), err)) self._showBlocker = not (len(readyList) > 0 and len(jobList) == 0) # Determine number of jobs to submit submit = len(jobList) if self._njobs_inqueue > 0: submit = min(submit, self._njobs_inqueue - self.jobDB.getJobsN(ClassSelector(JobClass.ATWMS))) if self._njobs_inflight > 0: submit = min(submit, self._njobs_inflight - self.jobDB.getJobsN(ClassSelector(JobClass.PROCESSING))) if self._continuous and (maxsample > 0): submit = min(submit, maxsample) submit = max(submit, 0) if self._do_shuffle: return self._sample(jobList, submit) return sorted(jobList)[:submit]
def dump_log_setup(level): root = logging.getLogger() def display_logger(indent, logger, name): propagate_symbol = '+' if hasattr(logger, 'propagate') and not logger.propagate: propagate_symbol = 'o' desc = name if hasattr(logger, 'level'): desc += ' (level = %s)' % logging.getLevelName(logger.level) root.log(level, '%s%s %s', '| ' * indent, propagate_symbol, desc) if hasattr(logger, 'filters'): for lf in logger.filters: root.log(level, '%s# %s', '| ' * (indent + 1), lf.__class__.__name__) if hasattr(logger, 'handlers'): for handler in logger.handlers: root.log(level, '%s> %s', '| ' * (indent + 1), handler.__class__.__name__) fmt = handler.formatter if fmt: desc = fmt.__class__.__name__ if isinstance(fmt, ExceptionFormatter): desc = repr(fmt) elif isinstance(fmt, logging.Formatter): desc += '(%s, %s)' % (repr(getattr( fmt, '_fmt')), repr(fmt.datefmt)) root.log(level, '%s| %% %s', '| ' * (indent + 1), desc) display_logger(0, root, '<root>') for key, logger in sorted(root.manager.loggerDict.items()): display_logger(key.count('.') + 1, logger, key)
def _read_jobs(self, job_limit): ensure_dir_exists(self._path_db, 'job database directory', JobError) candidates = [] for job_fn in fnmatch.filter(os.listdir(self._path_db), 'job_*.txt'): try: # 2xsplit is faster than regex jobnum = int(job_fn.split(".")[0].split("_")[1]) except Exception: clear_current_exception() continue candidates.append((jobnum, job_fn)) (job_map, max_job_len) = ({}, len(candidates)) activity = Activity('Reading job infos') idx = 0 for (jobnum, job_fn) in sorted(candidates): idx += 1 if jobnum >= job_limit >= 0: self._log.info('Stopped reading job infos at job #%d out of %d available job files, ' + 'since the limit of %d jobs is reached', jobnum, len(candidates), job_limit) break try: job_fn_full = os.path.join(self._path_db, job_fn) data = self._fmt.parse(SafeFile(job_fn_full).iter_close()) job_obj = self._create_job_obj(job_fn_full, data) except Exception: raise JobError('Unable to process job file %r' % job_fn_full) job_map[jobnum] = job_obj activity.update('Reading job infos %d [%d%%]' % (idx, (100.0 * idx) / max_job_len)) activity.finish() return job_map
def _displaySetup(self, dsPath, head): if os.path.exists(dsPath): nickNames = set() for block in DataProvider.loadFromFile(dsPath).getBlocks(): nickNames.add(block[DataProvider.Nickname]) utils.vprint('Mapping between nickname and other settings:\n', -1) report = [] for nick in sorted(nickNames): lumi_filter_str = formatLumi( self._nmLumi.lookup(nick, '', is_selector=False)) if len(lumi_filter_str) > 4: nice_lumi_filter = '%s ... %s (%d entries)' % ( lumi_filter_str[0], lumi_filter_str[-1], len(lumi_filter_str)) else: nice_lumi_filter = str.join(', ', lumi_filter_str) config_files = self._nmCfg.lookup(nick, '', is_selector=False) tmp = { 0: nick, 1: str.join(', ', imap(os.path.basename, config_files)), 2: nice_lumi_filter } lookupvars = {'DATASETNICK': nick} for src in self._pm.lookupSources: src.fillParameterInfo(None, lookupvars) tmp.update(lookupvars) report.append(tmp) utils.printTabular(head, report, 'cl') utils.vprint(level=-1)
def remove_all_overlap(data): dist2 = lambda a, b: (a['x'] - b['x'])**2 + (a['y'] - b['y'])**2 check_overlap = lambda a, b: dist2(a, b) < (a['weight'] + b['weight'])**2 def remove_overlap(fix, a): vec = {'x': a['x'] + fix['x'], 'y': a['y'] + fix['y']} norm = math.sqrt(dist2(vec, {'x': 0, 'y': 0})) * 1000 vec = {'x': vec['x'] / norm, 'y': vec['y'] / norm} for pt in result: while check_overlap(pt, a): a['x'] = a['x'] + vec['x'] * (random.random() - 0.25) a['y'] = a['y'] + vec['y'] * (random.random() - 0.25) return a def center_of_mass(data): wsum_x = sum(map(lambda pt: pt['x']*pt['weight'], data)) wsum_y = sum(map(lambda pt: pt['y']*pt['weight'], data)) sum_w = sum(map(lambda pt: pt['weight'], data)) return {'x': wsum_x / sum_w, 'y': wsum_y / sum_w} result = [] data = sorted(data, key = lambda x: -x['weight']) for pt in data: collisions = filter(lambda x: check_overlap(x, pt), result) if collisions: result.append(remove_overlap(center_of_mass(collisions), pt)) else: result.append(pt) return result
def __init__(self, config, name): self._name = name # needed for changeView calls before the constructor head = [('DATASETNICK', 'Nickname')] # Mapping between nickname and config files: self._nmCfg = config.getLookup('nickname config', {}, defaultMatcher = 'regex', parser = lambda x: lmap(str.strip, x.split(',')), strfun = lambda x: str.join(',', x)) if not self._nmCfg.empty(): allConfigFiles = sorted(set(ichain(self._nmCfg.get_values()))) config.set('config file', str.join('\n', allConfigFiles)) head.append((1, 'Config file')) elif config.get('config file', ''): raise ConfigError("Please use 'nickname config' instead of 'config file'") # Mapping between nickname and constants - only display - work is handled by the 'normal' parameter factory nmCName = config.getList('nickname constants', [], onChange = None) param_config = config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, addSections = ['parameters']) param_config.set('constants', str.join(' ', nmCName), '+=') for cName in nmCName: param_config.set(cName + ' matcher', 'regex') param_config.set(cName + ' lookup', 'DATASETNICK') head.append((cName, cName)) # Mapping between nickname and lumi filter - only display - work is handled by the 'normal' lumi filter config.set('lumi filter matcher', 'regex') if 'nickname lumi filter' in config.getOptions(): config.set('lumi filter', strDictLong(config.getDict('nickname lumi filter', {}, onChange = None))) self._nmLumi = config.getLookup('lumi filter', {}, parser = parseLumiFilter, strfun = strLumi, onChange = None) if not self._nmLumi.empty(): head.append((2, 'Lumi filter')) CMSSW.__init__(self, config, name) self._displaySetup(config.getWorkPath('datacache.dat'), head)
def process(self, blockIter): if self._sortDS: dsCache = {} for block in blockIter: dsCache.setdefault(block[DataProvider.Dataset], []).append(block) def ds_generator(): for ds in sorted(dsCache): if self._sortBlock: sort_inplace(dsCache[ds], key=itemgetter(DataProvider.BlockName)) for block in dsCache[ds]: yield block blockIter = ds_generator() elif self._sortBlock: blockIter = sorted(blockIter, key=itemgetter(DataProvider.BlockName)) # Yield blocks for block in blockIter: if self._sortFiles: sort_inplace(block[DataProvider.FileList], key=itemgetter(DataProvider.URL)) if self._sortLocation: sort_inplace(block[DataProvider.Locations]) yield block
def _check_map_name2key(self, map_key2name, map_key2metadata_dict): # Find name <-> key collisions map_type2name2key_list = {} for (key, name) in map_key2name.items(): if len(key) == 1: key_type = 'dataset' else: key_type = 'block' map_type2name2key_list.setdefault(key_type, {}).setdefault(name, []).append(key) collision = False map_key_type2vn_list = { 'dataset': self._hash_input_set_dataset, 'block': self._hash_input_set_dataset + self._hash_input_set_block } for (key_type, vn_list) in map_key_type2vn_list.items(): for (name, key_list) in map_type2name2key_list.get(key_type, {}).items(): if len(key_list) > 1: self._log.warn('Multiple %s keys are mapped to the name %s!', key_type, repr(name)) for idx, key in enumerate(sorted(key_list)): self._log.warn('\tCandidate #%d with key %r:', idx + 1, str.join('#', key)) metadata_dict = map_key2metadata_dict[key] for (vn, value) in filter_dict(metadata_dict, key_filter=vn_list.__contains__).items(): self._log.warn('\t\t%s = %s', vn, value) collision = True if self._interactive_assignment and collision: if not UserInputInterface().prompt_bool('Do you want to continue?', False): sys.exit(os.EX_OK)
def _cancel(self, task, wms, jobnum_list, interactive, show_jobs): if len(jobnum_list) == 0: return if show_jobs: self._abort_report.show_report(self.job_db, jobnum_list) if interactive and not self._uii.prompt_bool( 'Do you really want to cancel these jobs?', True): return def _mark_cancelled(jobnum): job_obj = self.job_db.get_job(jobnum) if job_obj is not None: self._update(task, job_obj, jobnum, Job.CANCELLED) self._local_event_handler.on_job_update( task, wms, job_obj, jobnum, {'reason': 'cancelled'}) jobnum_list.reverse() map_gc_id2jobnum = self._get_map_gc_id_jobnum(jobnum_list) gc_id_list = sorted(map_gc_id2jobnum, key=lambda gc_id: -map_gc_id2jobnum[gc_id]) for (gc_id, ) in wms.cancel_jobs(gc_id_list): # Remove cancelledd job from todo list and mark as cancelled _mark_cancelled(map_gc_id2jobnum.pop(gc_id)) if map_gc_id2jobnum: jobnum_list = list(map_gc_id2jobnum.values()) self._log.warning( 'There was a problem with cancelling the following jobs:') self._abort_report.show_report(self.job_db, jobnum_list) if (not interactive) or self._uii.prompt_bool( 'Do you want to mark them as cancelled?', True): lmap(_mark_cancelled, jobnum_list) if interactive: wait(2)
def _build_blocks(self, map_key2fm_list, map_key2name, map_key2metadata_dict): # Return named dataset for key in sorted(map_key2fm_list): result = { DataProvider.Dataset: map_key2name[key[:1]], DataProvider.BlockName: map_key2name[key[:2]], } fm_list = map_key2fm_list[key] # Determine location_list location_list = None for file_location_list in ifilter(lambda s: s is not None, imap(itemgetter(3), fm_list)): location_list = location_list or [] location_list.extend(file_location_list) if location_list is not None: result[DataProvider.Locations] = list(UniqueList(location_list)) # use first file [0] to get the initial metadata_dict [1] metadata_name_list = list(fm_list[0][1].keys()) result[DataProvider.Metadata] = metadata_name_list # translate file metadata into data provider file info entries def _translate_fm2fi(url, metadata_dict, entries, location_list, obj_dict): if entries is None: entries = -1 return {DataProvider.URL: url, DataProvider.NEntries: entries, DataProvider.Metadata: lmap(metadata_dict.get, metadata_name_list)} result[DataProvider.FileList] = lsmap(_translate_fm2fi, fm_list) yield result
def _submit_get_jobs(self, task): # Get list of submittable jobs jobnum_list_ready = self.job_db.get_job_list(ClassSelector(JobClass.SUBMIT_CANDIDATES)) (n_mod_ok, n_retry_ok, jobnum_list) = self._get_enabled_jobs(task, jobnum_list_ready) if self._show_blocker and jobnum_list_ready and not jobnum_list: # No submission but ready jobs err_str_list = [] if (n_retry_ok <= 0) or (n_mod_ok != 0): err_str_list.append('have hit their maximum number of retries') if (n_retry_ok != 0) and (n_mod_ok <= 0): err_str_list.append('are vetoed by the task module') err_delim = ' and ' if n_retry_ok or n_mod_ok: err_delim = ' or ' self._log.log_time(logging.WARNING, 'All remaining jobs %s!', str.join(err_delim, err_str_list)) self._show_blocker = not (len(jobnum_list_ready) > 0 and len(jobnum_list) == 0) # Determine number of jobs to submit submit = len(jobnum_list) if self._njobs_inqueue > 0: submit = min(submit, self._njobs_inqueue - self.job_db.get_job_len( ClassSelector(JobClass.ATWMS))) if self._njobs_inflight > 0: submit = min(submit, self._njobs_inflight - self.job_db.get_job_len( ClassSelector(JobClass.PROCESSING))) if self._chunks_enabled and (self._chunks_submit > 0): submit = min(submit, self._chunks_submit) submit = max(submit, 0) if self._do_shuffle: return self._sample(jobnum_list, submit) return sorted(jobnum_list)[:submit]
def display(self): (catStateDict, catDescDict, _) = CategoryBaseReport._getCategoryStateSummary(self) infos = [] head = set() stateCat = { Job.SUCCESS: 'SUCCESS', Job.FAILED: 'FAILED', Job.RUNNING: 'RUNNING', Job.DONE: 'RUNNING' } for catKey in catDescDict: tmp = dict(catDescDict[catKey]) head.update(tmp.keys()) for stateKey in catStateDict[catKey]: state = stateCat.get(stateKey, 'WAITING') tmp[state] = tmp.get(state, 0) + catStateDict[catKey][stateKey] infos.append(tmp) stateCatList = ['WAITING', 'RUNNING', 'FAILED', 'SUCCESS'] utils.printTabular(lmap(lambda x: (x, x), sorted(head) + stateCatList), infos, 'c' * len(head), fmt=dict.fromkeys( stateCatList, lambda x: '%7d' % parseStr(x, int, 0)))
def write(self, stream, print_minimal=False, print_source=False, print_oneline=False, **kwarg): config = self._prepare_write(**kwarg) for section in sorted(config): if not print_oneline: stream.write('[%s]\n' % section) for option in sorted(config[section]): entry_list = sorted(config[section][option], key=lambda e: e.order) if print_minimal: entry_list = ConfigEntry.simplify_entries(entry_list) for entry in entry_list: source = '' if print_source: source = entry.source stream.write(entry.format(print_section=print_oneline, source=source) + '\n') if not print_oneline: stream.write('\n')
def fillCMSFiles(self, block, blockPath): lumi_used = False lumiDict = {} if self._lumi_query: # central lumi query lumiDict = self.getCMSLumisImpl(blockPath) fileList = [] for (fileInfo, listLumi) in self.getCMSFilesImpl(blockPath, self.onlyValid, self._lumi_query): if lumiDict and not listLumi: listLumi = lumiDict.get(fileInfo[DataProvider.URL], []) if listLumi: (listLumiExt_Run, listLumiExt_Lumi) = ([], []) for (run, lumi_list) in sorted(listLumi): listLumiExt_Run.extend([run] * len(lumi_list)) listLumiExt_Lumi.extend(lumi_list) fileInfo[DataProvider.Metadata] = [ listLumiExt_Run, listLumiExt_Lumi ] lumi_used = True fileList.append(fileInfo) if lumi_used: block.setdefault(DataProvider.Metadata, []).extend(['Runs', 'Lumi']) block[DataProvider.FileList] = fileList
def _getUserSource(self, pExpr): # Wrap psource factory functions def createWrapper(clsName): def wrapper(*args): parameterClass = ParameterSource.getClass(clsName) try: return parameterClass.create(self._paramConfig, self._repository, *args) except Exception: raise ParameterError( 'Error while creating %r with arguments %r' % (parameterClass.__name__, args)) return wrapper userFun = {} for clsInfo in ParameterSource.getClassList(): for clsName in ifilter(lambda name: name != 'depth', clsInfo.keys()): userFun[clsName] = createWrapper(clsName) try: return eval(pExpr, dict(userFun)) # pylint:disable=eval-used except Exception: self._log.warning('Available functions: %s', sorted(userFun.keys())) raise
def _read_block(self, ds_config, dataset_expr, dataset_nick): metadata_name_list = parse_json(ds_config.get('metadata', '[]', on_change=None)) common_metadata = parse_json(ds_config.get('metadata common', '[]', on_change=None)) if len(common_metadata) > len(metadata_name_list): raise DatasetError('Unable to set %d common metadata items ' % len(common_metadata) + 'with %d metadata keys' % len(metadata_name_list)) common_prefix = ds_config.get('prefix', '', on_change=None) fn_list = [] has_events = False has_se_list = False for url in ds_config.get_option_list(): if url == 'se list': has_se_list = True elif url == 'events': has_events = True elif url not in ['dataset hash', 'metadata', 'metadata common', 'nickname', 'prefix']: fi = self._read_fi(ds_config, url, metadata_name_list, common_metadata, common_prefix) fn_list.append(fi) if not fn_list: raise DatasetError('There are no dataset files specified for dataset %r' % dataset_expr) result = { DataProvider.Nickname: ds_config.get('nickname', dataset_nick or '', on_change=None), DataProvider.FileList: sorted(fn_list, key=lambda fi: fi[DataProvider.URL]) } result.update(DataProvider.parse_block_id(dataset_expr)) if metadata_name_list: result[DataProvider.Metadata] = metadata_name_list if has_events: result[DataProvider.NEntries] = ds_config.get_int('events', -1, on_change=None) if has_se_list: result[DataProvider.Locations] = parse_list(ds_config.get('se list', '', on_change=None), ',') return result