def setupJobParameters(self, config, pm): config = config.addSections(['dataset']).addTags([self]) self.dataSplitter = None self.dataRefresh = None self.dataset = config.get('dataset', '').strip() if self.dataset == '': return config.set('se output pattern', '@NICK@_job_@MY_JOBID@_@X@', override = False) config.set('default lookup', 'DATASETNICK', override = False) defaultProvider = config.get('dataset provider', 'ListProvider') dataProvider = DataProvider.create(config, self.dataset, defaultProvider) splitterName = config.get('dataset splitter', 'FileBoundarySplitter') splitterClass = dataProvider.checkSplitter(DataSplitter.getClass(splitterName)) self.dataSplitter = splitterClass(config) self.checkSE = config.getBool('dataset storage check', True, onChange = None) # Create and register dataset parameter plugin paramSource = DataParameterSource(config.getWorkPath(), 'data', dataProvider, self.dataSplitter, self.initDataProcessor()) DataParameterSource.datasetsAvailable['data'] = paramSource # Select dataset refresh rate self.dataRefresh = config.getTime('dataset refresh', -1, onChange = None) if self.dataRefresh > 0: paramSource.resyncSetup(interval = max(self.dataRefresh, dataProvider.queryLimit())) utils.vprint('Dataset source will be queried every %s' % utils.strTime(self.dataRefresh), -1) else: paramSource.resyncSetup(interval = 0) def externalRefresh(sig, frame): paramSource.resyncSetup(force = True) signal.signal(signal.SIGUSR2, externalRefresh) if self.dataSplitter.getMaxJobs() == 0: raise UserError('There are no events to process')
def tree2expr(self, node): if isinstance(node, tuple): (operator, args) = node if operator == 'lookup': assert(len(args) == 2) return self._createVarSource(tree2names(args[0]), tree2names(args[1])) elif operator == 'ref': assert(len(args) == 1) refTypeDefault = 'dataset' if args[0] not in DataParameterSource.datasetsAvailable: refTypeDefault = 'csv' refType = self._paramConfig.get(args[0], 'type', refTypeDefault) if refType == 'dataset': return [DataParameterSource.create(self._paramConfig, args[0])] elif refType == 'csv': return [CSVParameterSource.create(self._paramConfig, args[0])] raise APIError('Unknown reference type: "%s"' % refType) else: args_complete = lchain(imap(self.tree2expr, args)) if operator == '*': return self.combineSources(CrossParameterSource, args_complete) elif operator == '+': return self.combineSources(ChainParameterSource, args_complete) elif operator == ',': return self.combineSources(ZipLongParameterSource, args_complete) raise APIError('Unknown token: "%s"' % operator) elif isinstance(node, int): return [node] else: return self._createVarSource([node], None)
def tree2expr(self, node): def tree2names(node): # return list of referenced variable names in tree if isinstance(node, tuple): result = [] for op_args in node[1:]: for arg in op_args: result.extend(tree2names(arg)) return result else: return [node] def createVarSource(var_list, lookup_list): # create variable source psource_list = [] for (doElevate, PSourceClass, args) in createLookupHelper(self.paramConfig, var_list, lookup_list): if doElevate: # switch needs elevation beyond local scope self.elevatedSwitch.append((PSourceClass, args)) else: psource_list.append(PSourceClass(*args)) # Optimize away unnecessary cross operations if len(filter(lambda p: p.getMaxParameters() != None, psource_list)) > 1: return [CrossParameterSource(*psource_list)] return psource_list # simply forward list of psources if isinstance(node, tuple): (operator, args) = node if operator == 'lookup': assert(len(args) == 2) return createVarSource(tree2names(args[0]), tree2names(args[1])) elif operator == 'ref': assert(len(args) == 1) refTypeDefault = 'dataset' if args[0] not in DataParameterSource.datasetsAvailable: refTypeDefault = 'csv' refType = self.paramConfig.get(args[0], 'type', refTypeDefault) if refType == 'dataset': return [DataParameterSource.create(self.paramConfig, args[0])] elif refType == 'csv': return [CSVParameterSource.create(self.paramConfig, args[0])] raise APIError('Unknown reference type: "%s"' % refType) else: args_complete = [] for expr_list in map(lambda expr: self.tree2expr(expr), args): args_complete.extend(expr_list) if operator == '*': return self.combineSources(CrossParameterSource, args_complete) elif operator == '+': return self.combineSources(ChainParameterSource, args_complete) elif operator == ',': return self.combineSources(ZipLongParameterSource, args_complete) raise APIError('Unknown token: "%s"' % operator) elif isinstance(node, int): return [node] else: return createVarSource([node], None)
def _getUserSource(self, pExpr, parent): tokens = tokenize(pExpr, lchain([self.precedence.keys(), list('()[]<>')])) tokens = list(tok2inlinetok(tokens, list(self.precedence.keys()))) utils.vprint('Parsing parameter string: "%s"' % str.join(' ', imap(str, tokens)), 0) tree = tok2tree(tokens, self.precedence) source_list = self.tree2expr(tree) if DataParameterSource.datasetsAvailable and not DataParameterSource.datasetsUsed: source_list.insert(0, DataParameterSource.create()) if parent: source_list.append(parent) if len(lfilter(lambda p: p.getMaxParameters() is not None, source_list)) > 1: source = self.combineSources(CrossParameterSource, source_list) else: source = self.combineSources(ZipLongParameterSource, source_list) # zip more efficient assert(len(source) == 1) source = source[0] for (PSourceClass, args) in self.elevatedSwitch: source = PSourceClass(source, *args) utils.vprint('Parsing output: %r' % source, 0) return source
def getSource(self, config): source = self._getRawSource(RNGParameterSource()) if DataParameterSource.datasetsAvailable and not DataParameterSource.datasetsUsed: source = CrossParameterSource(DataParameterSource.create(), source) return ParameterAdapter.getInstance(self.adapter, config, source)